From d0f81bbfa644e58e4059c341f1f2048cff4d1a58 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 14:53:33 +0200 Subject: [PATCH 01/22] fix cluster and compose --- .../tests/test_affinity_propagation.py | 6 +- sklearn/cluster/tests/test_bicluster.py | 30 ++++---- sklearn/cluster/tests/test_birch.py | 12 ++-- sklearn/cluster/tests/test_dbscan.py | 38 +++++------ sklearn/cluster/tests/test_hierarchical.py | 10 +-- sklearn/cluster/tests/test_k_means.py | 68 +++++++++---------- sklearn/cluster/tests/test_optics.py | 4 +- sklearn/cluster/tests/test_spectral.py | 2 +- .../compose/tests/test_column_transformer.py | 28 ++++---- 9 files changed, 99 insertions(+), 99 deletions(-) diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index d62df35b2cc55..57ab89cfd6e54 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -37,7 +37,7 @@ def test_affinity_propagation(): n_clusters_ = len(cluster_centers_indices) - assert_equal(n_clusters, n_clusters_) + assert n_clusters == n_clusters_ af = AffinityPropagation(preference=preference, affinity="precomputed") labels_precomputed = af.fit(S).labels_ @@ -50,8 +50,8 @@ def test_affinity_propagation(): cluster_centers_indices = af.cluster_centers_indices_ n_clusters_ = len(cluster_centers_indices) - assert_equal(np.unique(labels).size, n_clusters_) - assert_equal(n_clusters, n_clusters_) + assert np.unique(labels).size == n_clusters_ + assert n_clusters == n_clusters_ # Test also with no copy _, labels_no_copy = affinity_propagation(S, preference=preference, diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index dd5e91c18c27e..a5b486246c821 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -58,8 +58,8 @@ def _test_shape_indices(model): for i in range(model.n_clusters): m, n = model.get_shape(i) i_ind, j_ind = model.get_indices(i) - assert_equal(len(i_ind), m) - assert_equal(len(j_ind), n) + assert len(i_ind) == m + assert len(j_ind) == n def test_spectral_coclustering(): @@ -82,11 +82,11 @@ def test_spectral_coclustering(): **kwargs) model.fit(mat) - assert_equal(model.rows_.shape, (3, 30)) + assert model.rows_.shape == (3, 30) assert_array_equal(model.rows_.sum(axis=0), np.ones(30)) assert_array_equal(model.columns_.sum(axis=0), np.ones(30)) - assert_equal(consensus_score(model.biclusters_, - (rows, cols)), 1) + assert consensus_score(model.biclusters_, + (rows, cols)) == 1 _test_shape_indices(model) @@ -120,14 +120,14 @@ def test_spectral_biclustering(): else: model.fit(mat) - assert_equal(model.rows_.shape, (9, 30)) - assert_equal(model.columns_.shape, (9, 30)) + assert model.rows_.shape == (9, 30) + assert model.columns_.shape == (9, 30) assert_array_equal(model.rows_.sum(axis=0), np.repeat(3, 30)) assert_array_equal(model.columns_.sum(axis=0), np.repeat(3, 30)) - assert_equal(consensus_score(model.biclusters_, - (rows, cols)), 1) + assert consensus_score(model.biclusters_, + (rows, cols)) == 1 _test_shape_indices(model) @@ -216,20 +216,20 @@ def test_perfect_checkerboard(): S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0) model.fit(S) - assert_equal(consensus_score(model.biclusters_, - (rows, cols)), 1) + assert consensus_score(model.biclusters_, + (rows, cols)) == 1 S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0) model.fit(S) - assert_equal(consensus_score(model.biclusters_, - (rows, cols)), 1) + assert consensus_score(model.biclusters_, + (rows, cols)) == 1 S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0) model.fit(S) - assert_equal(consensus_score(model.biclusters_, - (rows, cols)), 1) + assert consensus_score(model.biclusters_, + (rows, cols)) == 1 def test_errors(): diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index d9dfbbd0c3d2b..522b0304a0111 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -31,8 +31,8 @@ def test_n_samples_leaves_roots(): n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_]) n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves() for sc in leaf.subclusters_]) - assert_equal(n_samples_leaves, X.shape[0]) - assert_equal(n_samples_root, X.shape[0]) + assert n_samples_leaves == X.shape[0] + assert n_samples_root == X.shape[0] def test_partial_fit(): @@ -76,8 +76,8 @@ def test_n_clusters(): X, y = make_blobs(n_samples=100, centers=10) brc1 = Birch(n_clusters=10) brc1.fit(X) - assert_greater(len(brc1.subcluster_centers_), 10) - assert_equal(len(np.unique(brc1.labels_)), 10) + assert len(brc1.subcluster_centers_) > 10 + assert len(np.unique(brc1.labels_)) == 10 # Test that n_clusters = Agglomerative Clustering gives # the same results. @@ -114,7 +114,7 @@ def test_sparse_X(): def check_branching_factor(node, branching_factor): subclusters = node.subclusters_ - assert_greater_equal(branching_factor, len(subclusters)) + assert branching_factor >= len(subclusters) for cluster in subclusters: if cluster.child_: check_branching_factor(cluster.child_, branching_factor) @@ -146,7 +146,7 @@ def check_threshold(birch_instance, threshold): while current_leaf: subclusters = current_leaf.subclusters_ for sc in subclusters: - assert_greater_equal(threshold, sc.radius) + assert threshold >= sc.radius current_leaf = current_leaf.next_leaf_ diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 0c4ec6c78179c..02f110e663ec4 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -41,13 +41,13 @@ def test_dbscan_similarity(): # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0) - assert_equal(n_clusters_1, n_clusters) + assert n_clusters_1 == n_clusters db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples) labels = db.fit(D).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_2, n_clusters) + assert n_clusters_2 == n_clusters def test_dbscan_feature(): @@ -64,13 +64,13 @@ def test_dbscan_feature(): # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_1, n_clusters) + assert n_clusters_1 == n_clusters db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples) labels = db.fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_2, n_clusters) + assert n_clusters_2 == n_clusters def test_dbscan_sparse(): @@ -123,7 +123,7 @@ def test_dbscan_no_core_samples(): db = DBSCAN(min_samples=6).fit(X_) assert_array_equal(db.components_, np.empty((0, X_.shape[1]))) assert_array_equal(db.labels_, -1) - assert_equal(db.core_sample_indices_.shape, (0,)) + assert db.core_sample_indices_.shape == (0,) def test_dbscan_callable(): @@ -142,14 +142,14 @@ def test_dbscan_callable(): # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_1, n_clusters) + assert n_clusters_1 == n_clusters db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples, algorithm='ball_tree') labels = db.fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_2, n_clusters) + assert n_clusters_2 == n_clusters def test_dbscan_metric_params(): @@ -191,32 +191,32 @@ def test_dbscan_balltree(): # number of clusters, ignoring noise if present n_clusters_1 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_1, n_clusters) + assert n_clusters_1 == n_clusters db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree') labels = db.fit(X).labels_ n_clusters_2 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_2, n_clusters) + assert n_clusters_2 == n_clusters db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree') labels = db.fit(X).labels_ n_clusters_3 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_3, n_clusters) + assert n_clusters_3 == n_clusters db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree') labels = db.fit(X).labels_ n_clusters_4 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_4, n_clusters) + assert n_clusters_4 == n_clusters db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples, algorithm='ball_tree') labels = db.fit(X).labels_ n_clusters_5 = len(set(labels)) - int(-1 in labels) - assert_equal(n_clusters_5, n_clusters) + assert n_clusters_5 == n_clusters def test_input_validation(): @@ -247,18 +247,18 @@ def test_dbscan_badargs(): def test_pickle(): obj = DBSCAN() s = pickle.dumps(obj) - assert_equal(type(pickle.loads(s)), obj.__class__) + assert type(pickle.loads(s)) == obj.__class__ def test_boundaries(): # ensure min_samples is inclusive of core point core, _ = dbscan([[0], [1]], eps=2, min_samples=2) - assert_in(0, core) + assert 0 in core # ensure eps is inclusive of circumference core, _ = dbscan([[0], [1], [1]], eps=1, min_samples=2) - assert_in(0, core) + assert 0 in core core, _ = dbscan([[0], [1], [1]], eps=.99, min_samples=2) - assert_not_in(0, core) + assert 0 not in core def test_weighted_dbscan(): @@ -293,7 +293,7 @@ def test_weighted_dbscan(): rng = np.random.RandomState(42) sample_weight = rng.randint(0, 5, X.shape[0]) core1, label1 = dbscan(X, sample_weight=sample_weight) - assert_equal(len(label1), len(X)) + assert len(label1) == len(X) X_repeated = np.repeat(X, sample_weight, axis=0) core_repeated, label_repeated = dbscan(X_repeated) @@ -364,11 +364,11 @@ def test_dbscan_precomputed_metric_with_degenerate_input_arrays(): # more details X = np.eye(10) labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_ - assert_equal(len(set(labels)), 1) + assert len(set(labels)) == 1 X = np.zeros((10, 10)) labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_ - assert_equal(len(set(labels)), 1) + assert len(set(labels)) == 1 def test_dbscan_precomputed_metric_with_initial_rows_zero(): diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 00c6813384322..7cd78a3e0ac40 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -93,7 +93,7 @@ def test_unstructured_linkage_tree(): children, n_nodes, n_leaves, parent = assert_warns( UserWarning, ward_tree, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 - assert_equal(len(children) + n_leaves, n_nodes) + assert len(children) + n_leaves == n_nodes for tree_builder in _TREE_BUILDERS.values(): for this_X in (X, X[0]): @@ -102,7 +102,7 @@ def test_unstructured_linkage_tree(): UserWarning, tree_builder, this_X.T, n_clusters=10) n_nodes = 2 * X.shape[1] - 1 - assert_equal(len(children) + n_leaves, n_nodes) + assert len(children) + n_leaves == n_nodes def test_height_linkage_tree(): @@ -538,7 +538,7 @@ def test_compute_full_tree(): agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] - assert_equal(n_nodes, n_samples - 1) + assert n_nodes == n_samples - 1 # When n_clusters is large, greater than max of 100 and 0.02 * n_samples. # we should stop when there are n_clusters. @@ -550,7 +550,7 @@ def test_compute_full_tree(): agc.fit(X) n_samples = X.shape[0] n_nodes = agc.children_.shape[0] - assert_equal(n_nodes, n_samples - n_clusters) + assert n_nodes == n_samples - n_clusters def test_n_components(): @@ -562,7 +562,7 @@ def test_n_components(): connectivity = np.eye(5) for linkage_func in _TREE_BUILDERS.values(): - assert_equal(ignore_warnings(linkage_func)(X, connectivity)[1], 5) + assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5 def test_agg_n_clusters(): diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 3788039194520..212c2311a84fd 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -155,13 +155,13 @@ def test_minibatch_update_consistency(): old_inertia, incremental_diff = _mini_batch_step( X_mb, sample_weight_mb, x_mb_squared_norms, new_centers, weight_sums, buffer, 1, None, random_reassign=False) - assert_greater(old_inertia, 0.0) + assert old_inertia > 0.0 # compute the new inertia on the same batch to check that it decreased labels, new_inertia = _labels_inertia( X_mb, sample_weight_mb, x_mb_squared_norms, new_centers) - assert_greater(new_inertia, 0.0) - assert_less(new_inertia, old_inertia) + assert new_inertia > 0.0 + assert new_inertia < old_inertia # check that the incremental difference computation is matching the # final observed value @@ -172,13 +172,13 @@ def test_minibatch_update_consistency(): old_inertia_csr, incremental_diff_csr = _mini_batch_step( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr, weight_sums_csr, buffer_csr, 1, None, random_reassign=False) - assert_greater(old_inertia_csr, 0.0) + assert old_inertia_csr > 0.0 # compute the new inertia on the same batch to check that it decreased labels_csr, new_inertia_csr = _labels_inertia( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr) - assert_greater(new_inertia_csr, 0.0) - assert_less(new_inertia_csr, old_inertia_csr) + assert new_inertia_csr > 0.0 + assert new_inertia_csr < old_inertia_csr # check that the incremental difference computation is matching the # final observed value @@ -197,14 +197,14 @@ def _check_fitted_model(km): # check that the number of clusters centers and distinct labels match # the expectation centers = km.cluster_centers_ - assert_equal(centers.shape, (n_clusters, n_features)) + assert centers.shape == (n_clusters, n_features) labels = km.labels_ - assert_equal(np.unique(labels).shape[0], n_clusters) + assert np.unique(labels).shape[0] == n_clusters # check that the labels assignment are perfect (up to a permutation) - assert_equal(v_measure_score(true_labels, labels), 1.0) - assert_greater(km.inertia_, 0.0) + assert v_measure_score(true_labels, labels) == 1.0 + assert km.inertia_ > 0.0 # check error on dataset being too small assert_raise_message(ValueError, "n_samples=1 should be >= n_clusters=%d" @@ -400,14 +400,14 @@ def test_minibatch_sensible_reassign_fit(): init="random") mb_k_means.fit(zeroed_X) # there should not be too many exact zero cluster centers - assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10) + assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10 # do the same with batch-size > X.shape[0] (regression test) mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=201, random_state=42, init="random") mb_k_means.fit(zeroed_X) # there should not be too many exact zero cluster centers - assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10) + assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10 def test_minibatch_sensible_reassign_partial_fit(): @@ -418,7 +418,7 @@ def test_minibatch_sensible_reassign_partial_fit(): for i in range(100): mb_k_means.partial_fit(zeroed_X) # there should not be too many exact zero cluster centers - assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10) + assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10 def test_minibatch_reassign(): @@ -445,7 +445,7 @@ def test_minibatch_reassign(): reassignment_ratio=1, verbose=True) finally: sys.stdout = old_stdout - assert_greater(score_before, mb_k_means.score(this_X)) + assert score_before > mb_k_means.score(this_X) # Give a perfect initialization, with a small reassignment_ratio, # no center should be reassigned @@ -510,14 +510,14 @@ def test_mini_batch_k_means_random_init_partial_fit(): # compute the labeling on the complete dataset labels = km.predict(X) - assert_equal(v_measure_score(true_labels, labels), 1.0) + assert v_measure_score(true_labels, labels) == 1.0 def test_minibatch_default_init_size(): mb_k_means = MiniBatchKMeans(init=centers.copy(), n_clusters=n_clusters, batch_size=10, random_state=42, n_init=1).fit(X) - assert_equal(mb_k_means.init_size_, 3 * mb_k_means.batch_size) + assert mb_k_means.init_size_ == 3 * mb_k_means.batch_size _check_fitted_model(mb_k_means) @@ -531,8 +531,8 @@ def test_minibatch_set_init_size(): mb_k_means = MiniBatchKMeans(init=centers.copy(), n_clusters=n_clusters, init_size=666, random_state=42, n_init=1).fit(X) - assert_equal(mb_k_means.init_size, 666) - assert_equal(mb_k_means.init_size_, n_samples) + assert mb_k_means.init_size == 666 + assert mb_k_means.init_size_ == n_samples _check_fitted_model(mb_k_means) @@ -565,7 +565,7 @@ def test_k_means_non_collapsed(): km.fit(my_X) # centers must not been collapsed - assert_equal(len(np.unique(km.labels_)), 3) + assert len(np.unique(km.labels_)) == 3 centers = km.cluster_centers_ assert np.linalg.norm(centers[0] - centers[1]) >= 0.1 @@ -636,7 +636,7 @@ def test_int_input(): ] for km in fitted_models: - assert_equal(km.cluster_centers_.dtype, np.float64) + assert km.cluster_centers_.dtype == np.float64 expected_labels = [0, 1, 1, 0, 0, 1] scores = np.array([v_measure_score(expected_labels, km.labels_) @@ -650,10 +650,10 @@ def test_transform(): X_new = km.transform(km.cluster_centers_) for c in range(n_clusters): - assert_equal(X_new[c, c], 0) + assert X_new[c, c] == 0 for c2 in range(n_clusters): if c != c2: - assert_greater(X_new[c, c2], 0) + assert X_new[c, c2] > 0 def test_fit_transform(): @@ -707,14 +707,14 @@ def test_k_means_function(): finally: sys.stdout = old_stdout centers = cluster_centers - assert_equal(centers.shape, (n_clusters, n_features)) + assert centers.shape == (n_clusters, n_features) labels = labels - assert_equal(np.unique(labels).shape[0], n_clusters) + assert np.unique(labels).shape[0] == n_clusters # check that the labels assignment are perfect (up to a permutation) - assert_equal(v_measure_score(true_labels, labels), 1.0) - assert_greater(inertia, 0.0) + assert v_measure_score(true_labels, labels) == 1.0 + assert inertia > 0.0 # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, @@ -766,18 +766,18 @@ def test_float_precision(Estimator, is_sparse): estimator.fit(X_test) # dtype of cluster centers has to be the dtype of the input # data - assert_equal(estimator.cluster_centers_.dtype, dtype) + assert estimator.cluster_centers_.dtype == dtype inertia[dtype] = estimator.inertia_ X_new[dtype] = estimator.transform(X_test) centers[dtype] = estimator.cluster_centers_ # ensure the extracted row is a 2d array - assert_equal(estimator.predict(X_test[:1]), + assert (estimator.predict(X_test[:1]) == estimator.labels_[0]) if hasattr(estimator, 'partial_fit'): estimator.partial_fit(X_test[0:3]) # dtype of cluster centers has to stay the same after # partial_fit - assert_equal(estimator.cluster_centers_.dtype, dtype) + assert estimator.cluster_centers_.dtype == dtype # compare arrays with low precision since the difference between # 32 and 64 bit sometimes makes a difference up to the 4th decimal @@ -801,8 +801,8 @@ def test_k_means_init_centers(): assert_array_equal(init_centers, init_centers_test) km = KMeans(init=init_centers_test, n_clusters=3, n_init=1) km.fit(X_test) - assert_equal(False, np.may_share_memory(km.cluster_centers_, - init_centers)) + assert False == np.may_share_memory(km.cluster_centers_, + init_centers) @pytest.mark.parametrize("data", [X, X_csr], ids=["dense", "sparse"]) @@ -843,7 +843,7 @@ def test_less_centers_than_unique_points(): # only three distinct points, so only three clusters # can have points assigned to them - assert_equal(set(km.labels_), set(range(3))) + assert set(km.labels_) == set(range(3)) # k_means should warn that fewer labels than cluster # centers have been used @@ -920,9 +920,9 @@ def test_check_sample_weight(): from sklearn.cluster.k_means_ import _check_sample_weight sample_weight = None checked_sample_weight = _check_sample_weight(X, sample_weight) - assert_equal(_num_samples(X), _num_samples(checked_sample_weight)) + assert _num_samples(X) == _num_samples(checked_sample_weight) assert_almost_equal(checked_sample_weight.sum(), _num_samples(X)) - assert_equal(X.dtype, checked_sample_weight.dtype) + assert X.dtype == checked_sample_weight.dtype def test_iter_attribute(): diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index 1e3d99746c9e9..b90d8ee7a1e3d 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -151,7 +151,7 @@ def test_correct_number_of_clusters(): clust.fit(X) # number of clusters, ignoring noise if present n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_) - assert_equal(n_clusters_1, n_clusters) + assert n_clusters_1 == n_clusters # check attribute types and sizes assert clust.labels_.shape == (len(X),) @@ -216,7 +216,7 @@ def test_close_extract(): clust = OPTICS(max_eps=1.0, cluster_method='dbscan', eps=0.3, min_samples=10).fit(X) # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters - assert_equal(max(clust.labels_), 2) + assert max(clust.labels_) == 2 @pytest.mark.parametrize('eps', [0.1, .3, .5]) diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 44d549d3ee743..58cc9f4f1036f 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -137,7 +137,7 @@ def test_affinities(): def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. - assert_equal(kwargs, {}) # no kernel_params that we didn't ask for + assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0) diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 69df675ac2c25..bcbbcc1c3902e 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -349,7 +349,7 @@ def test_column_transformer_sparse_stacking(): col_trans.fit(X_array) X_trans = col_trans.transform(X_array) assert sparse.issparse(X_trans) - assert_equal(X_trans.shape, (X_trans.shape[0], X_trans.shape[0] + 1)) + assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1) assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0])) assert len(col_trans.transformers_) == 2 assert col_trans.transformers_[-1][0] != 'remainder' @@ -515,9 +515,9 @@ def test_make_column_transformer(): norm = Normalizer() ct = make_column_transformer((scaler, 'first'), (norm, ['second'])) names, transformers, columns = zip(*ct.transformers) - assert_equal(names, ("standardscaler", "normalizer")) - assert_equal(transformers, (scaler, norm)) - assert_equal(columns, ('first', ['second'])) + assert names == ("standardscaler", "normalizer") + assert transformers == (scaler, norm) + assert columns == ('first', ['second']) def test_make_column_transformer_pandas(): @@ -537,11 +537,11 @@ def test_make_column_transformer_kwargs(): ct = make_column_transformer((scaler, 'first'), (norm, ['second']), n_jobs=3, remainder='drop', sparse_threshold=0.5) - assert_equal(ct.transformers, make_column_transformer( - (scaler, 'first'), (norm, ['second'])).transformers) - assert_equal(ct.n_jobs, 3) - assert_equal(ct.remainder, 'drop') - assert_equal(ct.sparse_threshold, 0.5) + assert ct.transformers == make_column_transformer( + (scaler, 'first'), (norm, ['second'])).transformers + assert ct.n_jobs == 3 + assert ct.remainder == 'drop' + assert ct.sparse_threshold == 0.5 # invalid keyword parameters should raise an error message assert_raise_message( TypeError, @@ -579,7 +579,7 @@ def test_column_transformer_get_set_params(): 'transformer_weights': None, 'verbose': False} - assert_dict_equal(ct.get_params(), exp) + assert ct.get_params() == exp ct.set_params(trans1__with_mean=False) assert not ct.get_params()['trans1__with_mean'] @@ -597,7 +597,7 @@ def test_column_transformer_get_set_params(): 'transformer_weights': None, 'verbose': False} - assert_dict_equal(ct.get_params(), exp) + assert ct.get_params() == exp def test_column_transformer_named_estimators(): @@ -613,7 +613,7 @@ def test_column_transformer_named_estimators(): assert isinstance(ct.named_transformers_.trans2, StandardScaler) assert not ct.named_transformers_.trans2.with_std # check it are fitted transformers - assert_equal(ct.named_transformers_.trans1.mean_, 1.) + assert ct.named_transformers_.trans1.mean_ == 1. def test_column_transformer_cloning(): @@ -647,7 +647,7 @@ def test_column_transformer_get_feature_names(): ct = ColumnTransformer( [('col' + str(i), DictVectorizer(), i) for i in range(2)]) ct.fit(X) - assert_equal(ct.get_feature_names(), ['col0__a', 'col0__b', 'col1__c']) + assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c'] # passthrough transformers not supported ct = ColumnTransformer([('trans', 'passthrough', [0, 1])]) @@ -667,7 +667,7 @@ def test_column_transformer_get_feature_names(): ct = ColumnTransformer( [('col0', DictVectorizer(), 0), ('col1', 'drop', 1)]) ct.fit(X) - assert_equal(ct.get_feature_names(), ['col0__a', 'col0__b']) + assert ct.get_feature_names() == ['col0__a', 'col0__b'] def test_column_transformer_special_strings(): From 014b864c301ea8295bf4559012faf038173a1150 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:02:11 +0200 Subject: [PATCH 02/22] fix common tests --- sklearn/tests/test_base.py | 39 +++++---- sklearn/tests/test_calibration.py | 20 ++--- sklearn/tests/test_common.py | 11 +-- sklearn/tests/test_discriminant_analysis.py | 15 ++-- sklearn/tests/test_dummy.py | 24 +++--- sklearn/tests/test_init.py | 2 +- sklearn/tests/test_isotonic.py | 10 +-- sklearn/tests/test_kernel_approximation.py | 22 ++--- sklearn/tests/test_multiclass.py | 72 ++++++++--------- sklearn/tests/test_multioutput.py | 32 ++++---- sklearn/tests/test_naive_bayes.py | 32 ++++---- sklearn/tests/test_pipeline.py | 90 ++++++++++----------- sklearn/tests/test_random_projection.py | 40 ++++----- 13 files changed, 203 insertions(+), 206 deletions(-) diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 316b01ff33415..1ed90ecca42ce 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -116,7 +116,7 @@ def test_clone(): selector = SelectFpr(f_classif, alpha=0.1) new_selector = clone(selector) assert selector is not new_selector - assert_equal(selector.get_params(), new_selector.get_params()) + assert selector.get_params() == new_selector.get_params() selector = SelectFpr(f_classif, alpha=np.zeros((10, 2))) new_selector = clone(selector) @@ -199,13 +199,12 @@ def test_repr(): my_estimator = MyEstimator() repr(my_estimator) test = T(K(), K()) - assert_equal( - repr(test), - "T(a=K(c=None, d=None), b=K(c=None, d=None))" - ) + assert ( + repr(test) == + "T(a=K(c=None, d=None), b=K(c=None, d=None))") some_est = T(a=["long_params"] * 1000) - assert_equal(len(repr(some_est)), 495) + assert len(repr(some_est)) == 495 def test_str(): @@ -288,11 +287,11 @@ def test_score_sample_weight(): # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different - assert_not_equal(est.score(ds.data, ds.target), - est.score(ds.data, ds.target, - sample_weight=sample_weight), - msg="Unweighted and weighted scores " - "are unexpectedly equal") + assert_message = ("Unweighted and weighted scores " + "are unexpectedly equal") + assert (est.score(ds.data, ds.target) != + est.score(ds.data, ds.target, + sample_weight=sample_weight)), assert_message def test_clone_pandas_dataframe(): @@ -330,7 +329,7 @@ def transform(self, X): # the test assert (e.df == cloned_e.df).values.all() - assert_equal(e.scalar_param, cloned_e.scalar_param) + assert e.scalar_param == cloned_e.scalar_param def test_pickle_version_warning_is_not_raised_with_matching_version(): @@ -343,7 +342,7 @@ def test_pickle_version_warning_is_not_raised_with_matching_version(): # test that we can predict with the restored decision tree classifier score_of_original = tree.score(iris.data, iris.target) score_of_restored = tree_restored.score(iris.data, iris.target) - assert_equal(score_of_original, score_of_restored) + assert score_of_original == score_of_restored class TreeBadVersion(DecisionTreeClassifier): @@ -424,8 +423,8 @@ def test_pickling_when_getstate_is_overwritten_by_mixin(): serialized = pickle.dumps(estimator) estimator_restored = pickle.loads(serialized) - assert_equal(estimator_restored.attribute_pickled, 5) - assert_equal(estimator_restored._attribute_not_pickled, None) + assert estimator_restored.attribute_pickled == 5 + assert estimator_restored._attribute_not_pickled == None assert estimator_restored._restored @@ -438,12 +437,12 @@ def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn(): type(estimator).__module__ = "notsklearn" serialized = estimator.__getstate__() - assert_dict_equal(serialized, {'_attribute_not_pickled': None, - 'attribute_pickled': 5}) + assert serialized == {'_attribute_not_pickled': None, + 'attribute_pickled': 5} serialized['attribute_pickled'] = 4 estimator.__setstate__(serialized) - assert_equal(estimator.attribute_pickled, 4) + assert estimator.attribute_pickled == 4 assert estimator._restored finally: type(estimator).__module__ = old_mod @@ -467,8 +466,8 @@ def test_pickling_works_when_getstate_is_overwritten_in_the_child_class(): serialized = pickle.dumps(estimator) estimator_restored = pickle.loads(serialized) - assert_equal(estimator_restored.attribute_pickled, 5) - assert_equal(estimator_restored._attribute_not_pickled, None) + assert estimator_restored.attribute_pickled == 5 + assert estimator_restored._attribute_not_pickled == None def test_tag_inheritance(): diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index a907444de2151..db7ed1920c43e 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -58,7 +58,7 @@ def test_calibration(): prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1] # Check that brier score has improved after calibration - assert_greater(brier_score_loss(y_test, prob_pos_clf), + assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(y_test, prob_pos_pc_clf)) # Check invariance against relabeling [0, 1] -> [1, 2] @@ -84,7 +84,7 @@ def test_calibration(): else: # Isotonic calibration is not invariant against relabeling # but should improve in both cases - assert_greater(brier_score_loss(y_test, prob_pos_clf), + assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss((y_test + 1) % 2, prob_pos_pc_clf_relabeled)) @@ -122,7 +122,7 @@ def test_sample_weight(): probs_without_sw = calibrated_clf.predict_proba(X_test) diff = np.linalg.norm(probs_with_sw - probs_without_sw) - assert_greater(diff, 0.1) + assert diff > 0.1 def test_calibration_multiclass(): @@ -158,7 +158,7 @@ def softmax(y_pred): uncalibrated_log_loss = \ log_loss(y_test, softmax(clf.decision_function(X_test))) calibrated_log_loss = log_loss(y_test, probas) - assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss) + assert uncalibrated_log_loss >= calibrated_log_loss # Test that calibration of a multiclass classifier decreases log-loss # for RandomForestClassifier @@ -177,7 +177,7 @@ def softmax(y_pred): cal_clf.fit(X_train, y_train) cal_clf_probs = cal_clf.predict_proba(X_test) cal_loss = log_loss(y_test, cal_clf_probs) - assert_greater(loss, cal_loss) + assert loss > cal_loss def test_calibration_prefit(): @@ -217,7 +217,7 @@ def test_calibration_prefit(): assert_array_equal(y_pred, np.array([0, 1])[np.argmax(y_prob, axis=1)]) - assert_greater(brier_score_loss(y_test, prob_pos_clf), + assert (brier_score_loss(y_test, prob_pos_clf) > brier_score_loss(y_test, prob_pos_pc_clf)) @@ -246,8 +246,8 @@ def test_calibration_curve(): prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) prob_true_unnormalized, prob_pred_unnormalized = \ calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True) - assert_equal(len(prob_true), len(prob_pred)) - assert_equal(len(prob_true), 2) + assert len(prob_true) == len(prob_pred) + assert len(prob_true) == 2 assert_almost_equal(prob_true, [0, 1]) assert_almost_equal(prob_pred, [0.1, 0.9]) assert_almost_equal(prob_true, prob_true_unnormalized) @@ -317,8 +317,8 @@ def test_calibration_less_classes(): enumerate(cal_clf.calibrated_classifiers_): proba = calibrated_classifier.predict_proba(X) assert_array_equal(proba[:, i], np.zeros(len(y))) - assert_equal(np.all(np.hstack([proba[:, :i], - proba[:, i + 1:]])), True) + assert np.all(np.hstack([proba[:, :i], + proba[:, i + 1:]])) == True @ignore_warnings(category=(DeprecationWarning, FutureWarning)) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 02c50cf2c9503..af5ea34e1d604 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -217,7 +217,7 @@ def test_root_import_all_completeness(): onerror=lambda _: None): if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS: continue - assert_in(modname, sklearn.__all__) + assert modname in sklearn.__all__ def test_all_tests_are_importable(): @@ -236,7 +236,8 @@ def test_all_tests_are_importable(): if ispkg and not HAS_TESTS_EXCEPTIONS.search(name) and name + '.tests' not in lookup] - assert_equal(missing_tests, [], - '{0} do not have `tests` subpackages. Perhaps they require ' - '__init__.py or an add_subpackage directive in the parent ' - 'setup.py'.format(missing_tests)) + assert missing_tests == [], ('{0} do not have `tests` subpackages. ' + 'Perhaps they require ' + '__init__.py or an add_subpackage directive ' + 'in the parent ' + 'setup.py'.format(missing_tests)) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index 3428f12b03306..c85227b37a7eb 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -212,10 +212,10 @@ def test_lda_transform(): # Test LDA transform. clf = LinearDiscriminantAnalysis(solver="svd", n_components=1) X_transformed = clf.fit(X, y).transform(X) - assert_equal(X_transformed.shape[1], 1) + assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1) X_transformed = clf.fit(X, y).transform(X) - assert_equal(X_transformed.shape[1], 1) + assert X_transformed.shape[1] == 1 clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1) clf.fit(X, y) @@ -236,14 +236,12 @@ def test_lda_explained_variance_ratio(): clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_eigen.fit(X, y) assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3) - assert_equal(clf_lda_eigen.explained_variance_ratio_.shape, (2,), - "Unexpected length for explained_variance_ratio_") + assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_" clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_svd.fit(X, y) assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3) - assert_equal(clf_lda_svd.explained_variance_ratio_.shape, (2,), - "Unexpected length for explained_variance_ratio_") + assert clf_lda_svd.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_" assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_) @@ -296,8 +294,7 @@ def test_lda_scaling(): for solver in ('svd', 'lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver) # should be able to separate the data perfectly - assert_equal(clf.fit(x, y).score(x, y), 1.0, - 'using covariance: %s' % solver) + assert clf.fit(x, y).score(x, y) == 1.0, 'using covariance: %s' % solver def test_lda_store_covariance(): @@ -430,7 +427,7 @@ def test_qda_priors(): y_pred = clf.fit(X6, y6).predict(X6) n_pos2 = np.sum(y_pred == 2) - assert_greater(n_pos2, n_pos) + assert n_pos2 > n_pos def test_qda_store_covariance(): diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index 8b641448604fe..dc80b2d073d81 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -36,8 +36,8 @@ def _check_predict_proba(clf, X, y): log_proba = [log_proba] for k in range(n_outputs): - assert_equal(proba[k].shape[0], n_samples) - assert_equal(proba[k].shape[1], len(np.unique(y[:, k]))) + assert proba[k].shape[0] == n_samples + assert proba[k].shape[1] == len(np.unique(y[:, k])) assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X))) # We know that we can have division by zero assert_array_almost_equal(np.log(proba[k]), log_proba[k]) @@ -50,7 +50,7 @@ def _check_behavior_2d(clf): est = clone(clf) est.fit(X, y) y_pred = est.predict(X) - assert_equal(y.shape, y_pred.shape) + assert y.shape == y_pred.shape # 2d case y = np.array([[1, 0], @@ -60,7 +60,7 @@ def _check_behavior_2d(clf): est = clone(clf) est.fit(X, y) y_pred = est.predict(X) - assert_equal(y.shape, y_pred.shape) + assert y.shape == y_pred.shape def _check_behavior_2d_for_constant(clf): @@ -73,7 +73,7 @@ def _check_behavior_2d_for_constant(clf): est = clone(clf) est.fit(X, y) y_pred = est.predict(X) - assert_equal(y.shape, y_pred.shape) + assert y.shape == y_pred.shape def _check_equality_regressor(statistic, y_learn, y_pred_learn, @@ -231,7 +231,7 @@ def test_string_labels(): def test_classifier_score_with_None(y, y_test): clf = DummyClassifier(strategy="most_frequent") clf.fit(None, y) - assert_equal(clf.score(None, y_test), 0.5) + assert clf.score(None, y_test) == 0.5 @pytest.mark.parametrize("strategy", [ @@ -472,7 +472,7 @@ def test_y_mean_attribute_regressor(): est = DummyRegressor(strategy='mean') est.fit(X, y) - assert_equal(est.constant_, np.mean(y)) + assert est.constant_ == np.mean(y) def test_unknown_strategey_regressor(): @@ -645,14 +645,14 @@ def test_dummy_regressor_sample_weight(n_samples=10): sample_weight = random_state.rand(n_samples) est = DummyRegressor(strategy="mean").fit(X, y, sample_weight) - assert_equal(est.constant_, np.average(y, weights=sample_weight)) + assert est.constant_ == np.average(y, weights=sample_weight) est = DummyRegressor(strategy="median").fit(X, y, sample_weight) - assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.)) + assert est.constant_ == _weighted_percentile(y, sample_weight, 50.) est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y, sample_weight) - assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.)) + assert est.constant_ == _weighted_percentile(y, sample_weight, 95.) def test_dummy_regressor_on_3D_array(): @@ -686,7 +686,7 @@ def test_dummy_regressor_return_std(): cls.fit(X, y) y_pred_list = cls.predict(X, return_std=True) # there should be two elements when return_std is True - assert_equal(len(y_pred_list), 2) + assert len(y_pred_list) == 2 # the second element should be all zeros assert_array_equal(y_pred_list[1], y_std_expected) @@ -704,7 +704,7 @@ def test_dummy_regressor_return_std(): def test_regressor_score_with_None(y, y_test): reg = DummyRegressor() reg.fit(None, y) - assert_equal(reg.score(None, y_test), 1.0) + assert reg.score(None, y_test) == 1.0 @pytest.mark.parametrize("strategy", [ diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py index 17f12e8da478e..d936ee4e6d2b7 100644 --- a/sklearn/tests/test_init.py +++ b/sklearn/tests/test_init.py @@ -17,4 +17,4 @@ def test_import_skl(): # Test either above import has failed for some reason # "import *" is discouraged outside of the module level, hence we # rely on setting up the variable above - assert_equal(_top_import_error, None) + assert _top_import_error == None diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py index f35d75ae73c00..b1baa1b60c71c 100644 --- a/sklearn/tests/test_isotonic.py +++ b/sklearn/tests/test_isotonic.py @@ -311,8 +311,8 @@ def test_isotonic_regression_oob_clip(): # Predict from training and test x and check that min/max match. y1 = ir.predict([min(x) - 10, max(x) + 10]) y2 = ir.predict(x) - assert_equal(max(y1), max(y2)) - assert_equal(min(y1), min(y2)) + assert max(y1) == max(y2) + assert min(y1) == min(y2) def test_isotonic_regression_oob_nan(): @@ -326,7 +326,7 @@ def test_isotonic_regression_oob_nan(): # Predict from training and test x and check that we have two NaNs. y1 = ir.predict([min(x) - 10, max(x) + 10]) - assert_equal(sum(np.isnan(y1)), 2) + assert sum(np.isnan(y1)) == 2 def test_isotonic_regression_oob_bad(): @@ -478,12 +478,12 @@ def test_isotonic_dtype(): ensure_2d=False).dtype res = isotonic_regression(y_np, sample_weight=sample_weight) - assert_equal(res.dtype, expected_dtype) + assert res.dtype == expected_dtype X = np.arange(len(y)).astype(dtype) reg.fit(X, y_np, sample_weight=sample_weight) res = reg.predict(X) - assert_equal(res.dtype, expected_dtype) + assert res.dtype == expected_dtype def test_make_unique_dtype(): diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index 2780981d536c8..af589010d6769 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -65,19 +65,19 @@ def test_additive_chi2_sampler(): # test that the sample_interval is initialized correctly transform = AdditiveChi2Sampler(sample_steps=sample_steps) - assert_equal(transform.sample_interval, None) + assert transform.sample_interval == None # test that the sample_interval is changed in the fit method transform.fit(X) - assert_not_equal(transform.sample_interval_, None) + assert transform.sample_interval_ != None # test that the sample_interval is set correctly sample_interval = 0.3 transform = AdditiveChi2Sampler(sample_steps=4, sample_interval=sample_interval) - assert_equal(transform.sample_interval, sample_interval) + assert transform.sample_interval == sample_interval transform.fit(X) - assert_equal(transform.sample_interval_, sample_interval) + assert transform.sample_interval_ == sample_interval def test_skewed_chi2_sampler(): @@ -133,10 +133,10 @@ def test_rbf_sampler(): kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx - assert_less_equal(np.abs(np.mean(error)), 0.01) # close to unbiased + assert np.abs(np.mean(error)) <= 0.01 # close to unbiased np.abs(error, out=error) - assert_less_equal(np.max(error), 0.1) # nothing too far off - assert_less_equal(np.mean(error), 0.05) # mean is fairly close + assert np.max(error) <= 0.1 # nothing too far off + assert np.mean(error) <= 0.05 # mean is fairly close def test_input_validation(): @@ -163,21 +163,21 @@ def test_nystroem_approximation(): trans = Nystroem(n_components=2, random_state=rnd) X_transformed = trans.fit(X).transform(X) - assert_equal(X_transformed.shape, (X.shape[0], 2)) + assert X_transformed.shape == (X.shape[0], 2) # test callable kernel def linear_kernel(X, Y): return np.dot(X, Y.T) trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd) X_transformed = trans.fit(X).transform(X) - assert_equal(X_transformed.shape, (X.shape[0], 2)) + assert X_transformed.shape == (X.shape[0], 2) # test that available kernels fit and transform kernels_available = kernel_metrics() for kern in kernels_available: trans = Nystroem(n_components=2, kernel=kern, random_state=rnd) X_transformed = trans.fit(X).transform(X) - assert_equal(X_transformed.shape, (X.shape[0], 2)) + assert X_transformed.shape == (X.shape[0], 2) def test_nystroem_default_parameters(): @@ -244,7 +244,7 @@ def logging_histogram_kernel(x, y, log): Nystroem(kernel=logging_histogram_kernel, n_components=(n_samples - 1), kernel_params={'log': kernel_log}).fit(X) - assert_equal(len(kernel_log), n_samples * (n_samples - 1) / 2) + assert len(kernel_log) == n_samples * (n_samples - 1) / 2 def linear_kernel(X, Y): return np.dot(X, Y.T) diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 7816656400b61..61b34a7509200 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -66,16 +66,16 @@ def test_ovr_fit_predict(): # A classifier which implements decision_function. ovr = OneVsRestClassifier(LinearSVC(random_state=0)) pred = ovr.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ovr.estimators_), n_classes) + assert len(ovr.estimators_) == n_classes clf = LinearSVC(random_state=0) pred2 = clf.fit(iris.data, iris.target).predict(iris.data) - assert_equal(np.mean(iris.target == pred), np.mean(iris.target == pred2)) + assert np.mean(iris.target == pred) == np.mean(iris.target == pred2) # A classifier which implements predict_proba. ovr = OneVsRestClassifier(MultinomialNB()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) - assert_greater(np.mean(iris.target == pred), 0.65) + assert np.mean(iris.target == pred) > 0.65 # 0.23. warning about tol not having its correct default value. @@ -91,8 +91,8 @@ def test_ovr_partial_fit(): pred2 = ovr2.fit(X, y).predict(X) assert_almost_equal(pred, pred2) - assert_equal(len(ovr.estimators_), len(np.unique(y))) - assert_greater(np.mean(y == pred), 0.65) + assert len(ovr.estimators_) == len(np.unique(y)) + assert np.mean(y == pred) > 0.65 # Test when mini batches doesn't have all classes # with SGDClassifier @@ -107,7 +107,7 @@ def test_ovr_partial_fit(): ovr1 = OneVsRestClassifier(SGDClassifier(max_iter=1, tol=None, shuffle=False, random_state=0)) pred1 = ovr1.fit(X, y).predict(X) - assert_equal(np.mean(pred == y), np.mean(pred1 == y)) + assert np.mean(pred == y) == np.mean(pred1 == y) # test partial_fit only exists if estimator has it: ovr = OneVsRestClassifier(SVC()) @@ -132,17 +132,17 @@ def test_ovr_ovo_regressor(): # function ovr = OneVsRestClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ovr.estimators_), n_classes) + assert len(ovr.estimators_) == n_classes assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible - assert_greater(np.mean(pred == iris.target), .9) + assert np.mean(pred == iris.target) > .9 ovr = OneVsOneClassifier(DecisionTreeRegressor()) pred = ovr.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ovr.estimators_), n_classes * (n_classes - 1) / 2) + assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2 assert_array_equal(np.unique(pred), [0, 1, 2]) # we are doing something sensible - assert_greater(np.mean(pred == iris.target), .9) + assert np.mean(pred == iris.target) > .9 def test_ovr_fit_predict_sparse(): @@ -204,7 +204,7 @@ def test_ovr_always_present(): y_pred = ovr.predict(X) assert_array_equal(np.array(y_pred), np.array(y)) y_pred = ovr.decision_function(X) - assert_equal(np.unique(y_pred[:, -2:]), 1) + assert np.unique(y_pred[:, -2:]) == 1 y_pred = ovr.predict_proba(X) assert_array_equal(y_pred[:, -1], np.ones(X.shape[0])) @@ -233,7 +233,7 @@ def test_ovr_multiclass(): LinearRegression(), Ridge(), ElasticNet()): clf = OneVsRestClassifier(base_clf).fit(X, y) - assert_equal(set(clf.classes_), classes) + assert set(clf.classes_) == classes y_pred = clf.predict(np.array([[0, 0, 4]]))[0] assert_array_equal(y_pred, ["eggs"]) @@ -253,24 +253,24 @@ def test_ovr_binary(): def conduct_test(base_clf, test_predict_proba=False): clf = OneVsRestClassifier(base_clf).fit(X, y) - assert_equal(set(clf.classes_), classes) + assert set(clf.classes_) == classes y_pred = clf.predict(np.array([[0, 0, 4]]))[0] assert_array_equal(y_pred, ["eggs"]) if hasattr(base_clf, 'decision_function'): dec = clf.decision_function(X) - assert_equal(dec.shape, (5,)) + assert dec.shape == (5,) if test_predict_proba: X_test = np.array([[0, 0, 4]]) probabilities = clf.predict_proba(X_test) - assert_equal(2, len(probabilities[0])) - assert_equal(clf.classes_[np.argmax(probabilities, axis=1)], + assert 2 == len(probabilities[0]) + assert (clf.classes_[np.argmax(probabilities, axis=1)] == clf.predict(X_test)) # test input as label indicator matrix clf = OneVsRestClassifier(base_clf).fit(X, Y) y_pred = clf.predict([[3, 0, 0]])[0] - assert_equal(y_pred, 1) + assert y_pred == 1 for base_clf in (LinearSVC(random_state=0), LinearRegression(), Ridge(), ElasticNet()): @@ -302,8 +302,8 @@ def test_ovr_multilabel(): def test_ovr_fit_predict_svc(): ovr = OneVsRestClassifier(svm.SVC()) ovr.fit(iris.data, iris.target) - assert_equal(len(ovr.estimators_), 3) - assert_greater(ovr.score(iris.data, iris.target), .9) + assert len(ovr.estimators_) == 3 + assert ovr.score(iris.data, iris.target) > .9 def test_ovr_multilabel_dataset(): @@ -450,10 +450,10 @@ def test_ovr_coef_(): # test with dense and sparse coef ovr.fit(X, iris.target) shape = ovr.coef_.shape - assert_equal(shape[0], n_classes) - assert_equal(shape[1], iris.data.shape[1]) + assert shape[0] == n_classes + assert shape[1] == iris.data.shape[1] # don't densify sparse coefficients - assert_equal(sp.issparse(ovr.estimators_[0].coef_), + assert (sp.issparse(ovr.estimators_[0].coef_) == sp.issparse(ovr.coef_)) @@ -489,12 +489,12 @@ def test_ovo_fit_predict(): # A classifier which implements decision_function. ovo = OneVsOneClassifier(LinearSVC(random_state=0)) ovo.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ovo.estimators_), n_classes * (n_classes - 1) / 2) + assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2 # A classifier which implements predict_proba. ovo = OneVsOneClassifier(MultinomialNB()) ovo.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ovo.estimators_), n_classes * (n_classes - 1) / 2) + assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2 def test_ovo_partial_fit_predict(): @@ -508,8 +508,8 @@ def test_ovo_partial_fit_predict(): ovo2 = OneVsOneClassifier(MultinomialNB()) ovo2.fit(X, y) pred2 = ovo2.predict(X) - assert_equal(len(ovo1.estimators_), n_classes * (n_classes - 1) / 2) - assert_greater(np.mean(y == pred1), 0.65) + assert len(ovo1.estimators_) == n_classes * (n_classes - 1) / 2 + assert np.mean(y == pred1) > 0.65 assert_almost_equal(pred1, pred2) # Test when mini-batches have binary target classes @@ -521,8 +521,8 @@ def test_ovo_partial_fit_predict(): pred2 = ovo2.fit(X, y).predict(X) assert_almost_equal(pred1, pred2) - assert_equal(len(ovo1.estimators_), len(np.unique(y))) - assert_greater(np.mean(y == pred1), 0.65) + assert len(ovo1.estimators_) == len(np.unique(y)) + assert np.mean(y == pred1) > 0.65 ovo = OneVsOneClassifier(MultinomialNB()) X = np.random.rand(14, 2) @@ -555,13 +555,13 @@ def test_ovo_decision_function(): # first binary ovo_clf.fit(iris.data, iris.target == 0) decisions = ovo_clf.decision_function(iris.data) - assert_equal(decisions.shape, (n_samples,)) + assert decisions.shape == (n_samples,) # then multi-class ovo_clf.fit(iris.data, iris.target) decisions = ovo_clf.decision_function(iris.data) - assert_equal(decisions.shape, (n_samples, n_classes)) + assert decisions.shape == (n_samples, n_classes) assert_array_equal(decisions.argmax(axis=1), ovo_clf.predict(iris.data)) # Compute the votes @@ -592,7 +592,7 @@ def test_ovo_decision_function(): # to compute the aggregate decision function. The iris dataset # has 150 samples with a couple of duplicates. The OvO decisions # can resolve most of the ties: - assert_greater(len(np.unique(decisions[:, class_idx])), 146) + assert len(np.unique(decisions[:, class_idx])) > 146 def test_ovo_gridsearch(): @@ -628,7 +628,7 @@ def test_ovo_ties(): # For the rest, there is no tie and the prediction is the argmax assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:]) # For the tie, the prediction is the class with the highest score - assert_equal(ovo_prediction[0], normalized_confidences[0].argmax()) + assert ovo_prediction[0] == normalized_confidences[0].argmax() # 0.23. warning about tol not having its correct default value. @@ -644,7 +644,7 @@ def test_ovo_ties2(): multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4, tol=None)) ovo_prediction = multi_clf.fit(X, y).predict(X) - assert_equal(ovo_prediction[0], i % 3) + assert ovo_prediction[0] == i % 3 def test_ovo_string_y(): @@ -685,12 +685,12 @@ def test_ecoc_fit_predict(): ecoc = OutputCodeClassifier(LinearSVC(random_state=0), code_size=2, random_state=0) ecoc.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ecoc.estimators_), n_classes * 2) + assert len(ecoc.estimators_) == n_classes * 2 # A classifier which implements predict_proba. ecoc = OutputCodeClassifier(MultinomialNB(), code_size=2, random_state=0) ecoc.fit(iris.data, iris.target).predict(iris.data) - assert_equal(len(ecoc.estimators_), n_classes * 2) + assert len(ecoc.estimators_) == n_classes * 2 def test_ecoc_gridsearch(): @@ -727,7 +727,7 @@ def test_pairwise_indices(): precomputed_indices = ovr_false.pairwise_indices_ for idx in precomputed_indices: - assert_equal(idx.shape[0] * n_estimators / (n_estimators - 1), + assert (idx.shape[0] * n_estimators / (n_estimators - 1) == linear_kernel.shape[0]) diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 65bc2a97246f7..35a739fee122b 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -130,7 +130,7 @@ def test_multi_target_sample_weight_partial_fit(): rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5)) rgr.partial_fit(X, y, w) - assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0]) + assert rgr.predict(X)[0][0] != rgr_w.predict(X)[0][0] def test_multi_target_sample_weights(): @@ -220,11 +220,11 @@ def test_multi_output_classification_partial_fit(): X[:half_index], y[:half_index], classes=classes) first_predictions = multi_target_linear.predict(X) - assert_equal((n_samples, n_outputs), first_predictions.shape) + assert (n_samples, n_outputs) == first_predictions.shape multi_target_linear.partial_fit(X[half_index:], y[half_index:]) second_predictions = multi_target_linear.predict(X) - assert_equal((n_samples, n_outputs), second_predictions.shape) + assert (n_samples, n_outputs) == second_predictions.shape # train the linear classification with each column and assert that # predictions are equal after first partial_fit and second partial_fit @@ -259,13 +259,13 @@ def test_multi_output_classification(): multi_target_forest.fit(X, y) predictions = multi_target_forest.predict(X) - assert_equal((n_samples, n_outputs), predictions.shape) + assert (n_samples, n_outputs) == predictions.shape predict_proba = multi_target_forest.predict_proba(X) assert len(predict_proba) == n_outputs for class_probabilities in predict_proba: - assert_equal((n_samples, n_classes), class_probabilities.shape) + assert (n_samples, n_classes) == class_probabilities.shape assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1), predictions) @@ -274,7 +274,7 @@ def test_multi_output_classification(): for i in range(3): forest_ = clone(forest) # create a clone with the same state forest_.fit(X, y[:, i]) - assert_equal(list(forest_.predict(X)), list(predictions[:, i])) + assert list(forest_.predict(X)) == list(predictions[:, i]) assert_array_equal(list(forest_.predict_proba(X)), list(predict_proba[i])) @@ -288,13 +288,13 @@ def test_multiclass_multioutput_estimator(): multi_target_svc.fit(X, y) predictions = multi_target_svc.predict(X) - assert_equal((n_samples, n_outputs), predictions.shape) + assert (n_samples, n_outputs) == predictions.shape # train the forest with each column and assert that predictions are equal for i in range(3): multi_class_svc_ = clone(multi_class_svc) # create a clone multi_class_svc_.fit(X, y[:, i]) - assert_equal(list(multi_class_svc_.predict(X)), + assert (list(multi_class_svc_.predict(X)) == list(predictions[:, i])) @@ -413,7 +413,7 @@ def test_classifier_chain_fit_and_predict_with_linear_svc(): classifier_chain.fit(X, Y) Y_pred = classifier_chain.predict(X) - assert_equal(Y_pred.shape, Y.shape) + assert Y_pred.shape == Y.shape Y_decision = classifier_chain.decision_function(X) @@ -456,7 +456,7 @@ def test_classifier_chain_vs_independent_models(): chain.fit(X_train, Y_train) Y_pred_chain = chain.predict(X_test) - assert_greater(jaccard_score(Y_test, Y_pred_chain, average='samples'), + assert (jaccard_score(Y_test, Y_pred_chain, average='samples') > jaccard_score(Y_test, Y_pred_ovr, average='samples')) @@ -468,8 +468,8 @@ def test_base_chain_fit_and_predict(): for chain in chains: chain.fit(X, Y) Y_pred = chain.predict(X) - assert_equal(Y_pred.shape, Y.shape) - assert_equal([c.coef_.size for c in chain.estimators_], + assert Y_pred.shape == Y.shape + assert ([c.coef_.size for c in chain.estimators_] == list(range(X.shape[1], X.shape[1] + Y.shape[1]))) Y_prob = chains[1].predict_proba(X) @@ -488,7 +488,7 @@ def test_base_chain_fit_and_predict_with_sparse_data_and_cv(): for chain in base_chains: chain.fit(X_sparse, Y) Y_pred = chain.predict(X_sparse) - assert_equal(Y_pred.shape, Y.shape) + assert Y_pred.shape == Y.shape def test_base_chain_random_order(): @@ -501,9 +501,9 @@ def test_base_chain_random_order(): chain_fixed = clone(chain).set_params(order=chain_random.order_) chain_fixed.fit(X, Y) assert_array_equal(chain_fixed.order_, chain_random.order_) - assert_not_equal(list(chain_random.order), list(range(4))) - assert_equal(len(chain_random.order_), 4) - assert_equal(len(set(chain_random.order_)), 4) + assert list(chain_random.order) != list(range(4)) + assert len(chain_random.order_) == 4 + assert len(set(chain_random.order_)) == 4 # Randomly ordered chain should behave identically to a fixed order # chain with the same order. for est1, est2 in zip(chain_random.estimators_, diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 77ebb0125529f..018860d96fa84 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -143,7 +143,7 @@ def test_gnb_prior_large_bias(): """Test if good prediction when class prior favor largely one class""" clf = GaussianNB(priors=np.array([0.01, 0.99])) clf.fit(X, y) - assert_equal(clf.predict([[-0.1, -0.1]]), np.array([2])) + assert clf.predict([[-0.1, -0.1]]) == np.array([2]) def test_gnb_check_update_with_no_data(): @@ -155,8 +155,8 @@ def test_gnb_check_update_with_no_data(): x_empty = np.empty((0, X.shape[1])) tmean, tvar = GaussianNB._update_mean_variance(prev_points, mean, var, x_empty) - assert_equal(tmean, mean) - assert_equal(tvar, var) + assert tmean == mean + assert tvar == var def test_gnb_pfit_wrong_nb_features(): @@ -289,8 +289,8 @@ def test_discretenb_predict_proba(): for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) - assert_equal(clf.predict(X[-1:]), 2) - assert_equal(clf.predict_proba([X[0]]).shape, (1, 2)) + assert clf.predict(X[-1:]) == 2 + assert clf.predict_proba([X[0]]).shape == (1, 2) assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1), np.array([1., 1.]), 6) @@ -299,8 +299,8 @@ def test_discretenb_predict_proba(): for cls, X in zip([BernoulliNB, MultinomialNB], [X_bernoulli, X_multinomial]): clf = cls().fit(X, y) - assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3)) - assert_equal(clf.predict_proba(X[:2]).shape, (2, 3)) + assert clf.predict_proba(X[0:1]).shape == (1, 3) + assert clf.predict_proba(X[:2]).shape == (2, 3) assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1) assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1) assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1) @@ -387,8 +387,8 @@ def test_discretenb_coef_intercept_shape(cls): clf = cls() clf.fit(X, y) - assert_equal(clf.coef_.shape, (1, 3)) - assert_equal(clf.intercept_.shape, (1,)) + assert clf.coef_.shape == (1, 3) + assert clf.intercept_.shape == (1,) @pytest.mark.parametrize('kind', ('dense', 'sparse')) @@ -716,24 +716,24 @@ def test_check_accuracy_on_digits(): # Multinomial NB scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10) - assert_greater(scores.mean(), 0.86) + assert scores.mean() > 0.86 scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10) - assert_greater(scores.mean(), 0.94) + assert scores.mean() > 0.94 # Bernoulli NB scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10) - assert_greater(scores.mean(), 0.83) + assert scores.mean() > 0.83 scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10) - assert_greater(scores.mean(), 0.92) + assert scores.mean() > 0.92 # Gaussian NB scores = cross_val_score(GaussianNB(), X, y, cv=10) - assert_greater(scores.mean(), 0.77) + assert scores.mean() > 0.77 scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10) - assert_greater(scores.mean(), 0.89) + assert scores.mean() > 0.89 scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10) - assert_greater(scores.mean(), 0.86) + assert scores.mean() > 0.86 diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index b40ca7778f2fa..e064f0ba39572 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -170,14 +170,14 @@ def test_pipeline_init(): # Smoke test with only an estimator clf = NoTrans() pipe = Pipeline([('svc', clf)]) - assert_equal(pipe.get_params(deep=True), + assert (pipe.get_params(deep=True) == dict(svc__a=None, svc__b=None, svc=clf, **pipe.get_params(deep=False))) # Check that params are set pipe.set_params(svc__a=0.1) - assert_equal(clf.a, 0.1) - assert_equal(clf.b, None) + assert clf.a == 0.1 + assert clf.b == None # Smoke test the repr: repr(pipe) @@ -199,7 +199,7 @@ def test_pipeline_init(): # Check that params are set pipe.set_params(svc__C=0.1) - assert_equal(clf.C, 0.1) + assert clf.C == 0.1 # Smoke test the repr: repr(pipe) @@ -225,7 +225,7 @@ def test_pipeline_init(): params.pop('anova') params2.pop('svc') params2.pop('anova') - assert_equal(params, params2) + assert params == params2 def test_pipeline_init_tuple(): @@ -278,10 +278,10 @@ def test_pipeline_sample_weight_supported(): X = np.array([[1, 2]]) pipe = Pipeline([('transf', Transf()), ('clf', FitParamT())]) pipe.fit(X, y=None) - assert_equal(pipe.score(X), 3) - assert_equal(pipe.score(X, y=None), 3) - assert_equal(pipe.score(X, y=None, sample_weight=None), 3) - assert_equal(pipe.score(X, sample_weight=np.array([2, 3])), 8) + assert pipe.score(X) == 3 + assert pipe.score(X, y=None) == 3 + assert pipe.score(X, y=None, sample_weight=None) == 3 + assert pipe.score(X, sample_weight=np.array([2, 3])) == 8 def test_pipeline_sample_weight_unsupported(): @@ -289,8 +289,8 @@ def test_pipeline_sample_weight_unsupported(): X = np.array([[1, 2]]) pipe = Pipeline([('transf', Transf()), ('clf', Mult())]) pipe.fit(X, y=None) - assert_equal(pipe.score(X), 3) - assert_equal(pipe.score(X, sample_weight=None), 3) + assert pipe.score(X) == 3 + assert pipe.score(X, sample_weight=None) == 3 assert_raise_message( TypeError, "score() got an unexpected keyword argument 'sample_weight'", @@ -382,16 +382,16 @@ def test_pipeline_methods_preprocessing_svm(): # check shapes of various prediction functions predict = pipe.predict(X) - assert_equal(predict.shape, (n_samples,)) + assert predict.shape == (n_samples,) proba = pipe.predict_proba(X) - assert_equal(proba.shape, (n_samples, n_classes)) + assert proba.shape == (n_samples, n_classes) log_proba = pipe.predict_log_proba(X) - assert_equal(log_proba.shape, (n_samples, n_classes)) + assert log_proba.shape == (n_samples, n_classes) decision_function = pipe.decision_function(X) - assert_equal(decision_function.shape, (n_samples, n_classes)) + assert decision_function.shape == (n_samples, n_classes) pipe.score(X, y) @@ -467,7 +467,7 @@ def test_feature_union(): fs = FeatureUnion([("svd", svd), ("select", select)]) fs.fit(X, y) X_transformed = fs.transform(X) - assert_equal(X_transformed.shape, (X.shape[0], 3)) + assert X_transformed.shape == (X.shape[0], 3) # check if it does the expected thing assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X)) @@ -487,12 +487,12 @@ def test_feature_union(): # test setting parameters fs.set_params(select__k=2) - assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4)) + assert fs.fit_transform(X, y).shape == (X.shape[0], 4) # test it works with transformers missing fit_transform fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)]) X_transformed = fs.fit_transform(X, y) - assert_equal(X_transformed.shape, (X.shape[0], 8)) + assert X_transformed.shape == (X.shape[0], 8) # test error if some elements do not support transform assert_raises_regex(TypeError, @@ -511,16 +511,16 @@ def test_make_union(): mock = Transf() fu = make_union(pca, mock) names, transformers = zip(*fu.transformer_list) - assert_equal(names, ("pca", "transf")) - assert_equal(transformers, (pca, mock)) + assert names == ("pca", "transf") + assert transformers == (pca, mock) def test_make_union_kwargs(): pca = PCA(svd_solver='full') mock = Transf() fu = make_union(pca, mock, n_jobs=3) - assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list) - assert_equal(3, fu.n_jobs) + assert fu.transformer_list == make_union(pca, mock).transformer_list + assert 3 == fu.n_jobs # invalid keyword parameters should raise an error message assert_raise_message( TypeError, @@ -596,15 +596,15 @@ def test_set_pipeline_steps(): pipeline.steps = [('mock2', transf2)] assert 'mock' not in pipeline.named_steps assert pipeline.named_steps['mock2'] is transf2 - assert_equal([('mock2', transf2)], pipeline.steps) + assert [('mock2', transf2)] == pipeline.steps # Using set_params pipeline.set_params(steps=[('mock', transf1)]) - assert_equal([('mock', transf1)], pipeline.steps) + assert [('mock', transf1)] == pipeline.steps # Using set_params to replace single step pipeline.set_params(mock=transf2) - assert_equal([('mock', transf2)], pipeline.steps) + assert [('mock', transf2)] == pipeline.steps # With invalid data pipeline.set_params(steps=[('junk', ())]) @@ -673,7 +673,7 @@ def make(): assert_array_equal([[exp]], pipeline.fit_transform(X, y)) assert_array_equal([exp], pipeline.fit(X).predict(X)) assert_array_equal(X, pipeline.inverse_transform([[exp]])) - assert_dict_equal(pipeline.get_params(deep=True), + assert (pipeline.get_params(deep=True) == {'steps': pipeline.steps, 'm2': mult2, 'm3': passthrough, @@ -755,14 +755,14 @@ def test_make_pipeline(): t2 = Transf() pipe = make_pipeline(t1, t2) assert isinstance(pipe, Pipeline) - assert_equal(pipe.steps[0][0], "transf-1") - assert_equal(pipe.steps[1][0], "transf-2") + assert pipe.steps[0][0] == "transf-1" + assert pipe.steps[1][0] == "transf-2" pipe = make_pipeline(t1, t2, FitParamT()) assert isinstance(pipe, Pipeline) - assert_equal(pipe.steps[0][0], "transf-1") - assert_equal(pipe.steps[1][0], "transf-2") - assert_equal(pipe.steps[2][0], "fitparamt") + assert pipe.steps[0][0] == "transf-1" + assert pipe.steps[1][0] == "transf-2" + assert pipe.steps[2][0] == "fitparamt" assert_raise_message( TypeError, @@ -801,7 +801,7 @@ def test_feature_union_weights(): 10 * pca.fit_transform(X)) assert_array_equal(X_fit_transformed[:, -1], select.fit_transform(X, y).ravel()) - assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7)) + assert X_fit_transformed_wo_method.shape == (X.shape[0], 7) def test_feature_union_parallel(): @@ -825,11 +825,11 @@ def test_feature_union_parallel(): fs.fit(X) X_transformed = fs.transform(X) - assert_equal(X_transformed.shape[0], len(X)) + assert X_transformed.shape[0] == len(X) fs_parallel.fit(X) X_transformed_parallel = fs_parallel.transform(X) - assert_equal(X_transformed.shape, X_transformed_parallel.shape) + assert X_transformed.shape == X_transformed_parallel.shape assert_array_equal( X_transformed.toarray(), X_transformed_parallel.toarray() @@ -858,7 +858,7 @@ def test_feature_union_feature_names(): feature_names = ft.get_feature_names() for feat in feature_names: assert "chars__" in feat or "words__" in feat - assert_equal(len(feature_names), 35) + assert len(feature_names) == 35 ft = FeatureUnion([("tr1", Transf())]).fit([[1]]) assert_raise_message(AttributeError, @@ -891,22 +891,22 @@ def test_set_feature_union_steps(): ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) assert_array_equal([[2, 3]], ft.transform(np.asarray([[1]]))) - assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names()) + assert ['m2__x2', 'm3__x3'] == ft.get_feature_names() # Directly setting attr ft.transformer_list = [('m5', mult5)] assert_array_equal([[5]], ft.transform(np.asarray([[1]]))) - assert_equal(['m5__x5'], ft.get_feature_names()) + assert ['m5__x5'] == ft.get_feature_names() # Using set_params ft.set_params(transformer_list=[('mock', mult3)]) assert_array_equal([[3]], ft.transform(np.asarray([[1]]))) - assert_equal(['mock__x3'], ft.get_feature_names()) + assert ['mock__x3'] == ft.get_feature_names() # Using set_params to replace single step ft.set_params(mock=mult5) assert_array_equal([[5]], ft.transform(np.asarray([[1]]))) - assert_equal(['mock__x5'], ft.get_feature_names()) + assert ['mock__x5'] == ft.get_feature_names() @pytest.mark.parametrize('drop', ['drop', None]) @@ -920,17 +920,17 @@ def test_set_feature_union_step_drop(drop): ft = FeatureUnion([('m2', mult2), ('m3', mult3)]) assert_array_equal([[2, 3]], ft.fit(X).transform(X)) assert_array_equal([[2, 3]], ft.fit_transform(X)) - assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names()) + assert ['m2__x2', 'm3__x3'] == ft.get_feature_names() ft.set_params(m2=drop) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) - assert_equal(['m3__x3'], ft.get_feature_names()) + assert ['m3__x3'] == ft.get_feature_names() ft.set_params(m3=drop) assert_array_equal([[]], ft.fit(X).transform(X)) assert_array_equal([[]], ft.fit_transform(X)) - assert_equal([], ft.get_feature_names()) + assert [] == ft.get_feature_names() # check we can change back ft.set_params(m3=mult3) @@ -940,7 +940,7 @@ def test_set_feature_union_step_drop(drop): ft = FeatureUnion([('m2', drop), ('m3', mult3)]) assert_array_equal([[3]], ft.fit(X).transform(X)) assert_array_equal([[3]], ft.fit_transform(X)) - assert_equal(['m3__x3'], ft.get_feature_names()) + assert ['m3__x3'] == ft.get_feature_names() def test_step_name_validation(): @@ -1066,7 +1066,7 @@ def test_pipeline_memory(): assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y)) assert_array_equal(pipe.named_steps['transf'].means_, cached_pipe.named_steps['transf'].means_) - assert_equal(ts, cached_pipe.named_steps['transf'].timestamp_) + assert ts == cached_pipe.named_steps['transf'].timestamp_ # Create a new pipeline with cloned estimators # Check that even changing the name step does not affect the cache hit clf_2 = SVC(probability=True, random_state=0) @@ -1084,7 +1084,7 @@ def test_pipeline_memory(): assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y)) assert_array_equal(pipe.named_steps['transf'].means_, cached_pipe_2.named_steps['transf_2'].means_) - assert_equal(ts, cached_pipe_2.named_steps['transf_2'].timestamp_) + assert ts == cached_pipe_2.named_steps['transf_2'].timestamp_ finally: shutil.rmtree(cachedir) diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index a1205610cdb9f..93d22ba0e88ef 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -91,10 +91,10 @@ def check_input_size_random_matrix(random_matrix): def check_size_generated(random_matrix): - assert_equal(random_matrix(1, 5).shape, (1, 5)) - assert_equal(random_matrix(5, 1).shape, (5, 1)) - assert_equal(random_matrix(5, 5).shape, (5, 5)) - assert_equal(random_matrix(1, 1).shape, (1, 1)) + assert random_matrix(1, 5).shape == (1, 5) + assert random_matrix(5, 1).shape == (5, 1) + assert random_matrix(5, 5).shape == (5, 5) + assert random_matrix(1, 1).shape == (1, 1) def check_zero_mean_and_unit_norm(random_matrix): @@ -162,14 +162,14 @@ def test_sparse_random_matrix(): # Check possible values values = np.unique(A) - assert_in(np.sqrt(s) / np.sqrt(n_components), values) - assert_in(- np.sqrt(s) / np.sqrt(n_components), values) + assert np.sqrt(s) / np.sqrt(n_components) in values + assert - np.sqrt(s) / np.sqrt(n_components) in values if density == 1.0: - assert_equal(np.size(values), 2) + assert np.size(values) == 2 else: - assert_in(0., values) - assert_equal(np.size(values), 3) + assert 0. in values + assert np.size(values) == 3 # Check that the random matrix follow the proper distribution. # Let's say that each element of a_{ij} of A is taken from @@ -263,8 +263,8 @@ def test_random_projection_embedding_quality(): # check that the automatically tuned values for the density respect the # contract for eps: pairwise distances are preserved according to the # Johnson-Lindenstrauss lemma - assert_less(distances_ratio.max(), 1 + eps) - assert_less(1 - eps, distances_ratio.min()) + assert distances_ratio.max() < 1 + eps + assert 1 - eps < distances_ratio.min() def test_SparseRandomProjection_output_representation(): @@ -298,17 +298,17 @@ def test_correct_RandomProjection_dimensions_embedding(): # the number of components is adjusted from the shape of the training # set - assert_equal(rp.n_components, 'auto') - assert_equal(rp.n_components_, 110) + assert rp.n_components == 'auto' + assert rp.n_components_ == 110 if RandomProjection in all_SparseRandomProjection: - assert_equal(rp.density, 'auto') + assert rp.density == 'auto' assert_almost_equal(rp.density_, 0.03, 2) - assert_equal(rp.components_.shape, (110, n_features)) + assert rp.components_.shape == (110, n_features) projected_1 = rp.transform(data) - assert_equal(projected_1.shape, (n_samples, 110)) + assert projected_1.shape == (n_samples, 110) # once the RP is 'fitted' the projection is always the same projected_2 = rp.transform(data) @@ -328,10 +328,10 @@ def test_correct_RandomProjection_dimensions_embedding(): rp = RandomProjection(n_components=100, density=0.001, random_state=0) projected = rp.fit_transform(data) - assert_equal(projected.shape, (n_samples, 100)) - assert_equal(rp.components_.shape, (100, n_features)) - assert_less(rp.components_.nnz, 115) # close to 1% density - assert_less(85, rp.components_.nnz) # close to 1% density + assert projected.shape == (n_samples, 100) + assert rp.components_.shape == (100, n_features) + assert rp.components_.nnz < 115 # close to 1% density + assert 85 < rp.components_.nnz # close to 1% density def test_warning_n_components_greater_than_n_features(): From e9ad46e91aca526060be30277be7680eeeed993a Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:28:15 +0200 Subject: [PATCH 03/22] fix covariance, cross_decomposition, datasets --- sklearn/covariance/tests/test_covariance.py | 2 +- sklearn/cross_decomposition/tests/test_pls.py | 2 +- sklearn/datasets/tests/test_20news.py | 34 ++--- sklearn/datasets/tests/test_base.py | 92 +++++------ sklearn/datasets/tests/test_covtype.py | 10 +- sklearn/datasets/tests/test_kddcup99.py | 24 +-- sklearn/datasets/tests/test_lfw.py | 10 +- sklearn/datasets/tests/test_rcv1.py | 14 +- .../datasets/tests/test_samples_generator.py | 143 +++++++++--------- .../datasets/tests/test_svmlight_format.py | 70 ++++----- 10 files changed, 199 insertions(+), 202 deletions(-) diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index bf4449004ae0d..d7e6428ee27fb 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -43,7 +43,7 @@ def test_covariance(): cov.error_norm, emp_cov, norm='foo') # Mahalanobis distances computation test mahal_dist = cov.mahalanobis(X) - assert_greater(np.amin(mahal_dist), 0) + assert np.amin(mahal_dist) > 0 # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index 3d408443e1563..abb305aefdb37 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -281,7 +281,7 @@ def test_PLSSVD(): for clf in [pls_.PLSSVD, pls_.PLSRegression, pls_.PLSCanonical]: pls = clf(n_components=n_components) pls.fit(X, Y) - assert_equal(n_components, pls.y_scores_.shape[1]) + assert n_components == pls.y_scores_.shape[1] def test_univariate_pls_regression(): diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index 90b09614b7a3a..5b171999433db 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -22,14 +22,14 @@ def test_20news(): subset='all', categories=data.target_names[-1:-3:-1], shuffle=False) # Check that the ordering of the target_names is the same # as the ordering in the full dataset - assert_equal(data2cats.target_names, + assert (data2cats.target_names == data.target_names[-2:]) # Assert that we have only 0 and 1 as labels - assert_equal(np.unique(data2cats.target).tolist(), [0, 1]) + assert np.unique(data2cats.target).tolist() == [0, 1] # Check that the number of filenames is consistent with data/target - assert_equal(len(data2cats.filenames), len(data2cats.target)) - assert_equal(len(data2cats.filenames), len(data2cats.data)) + assert len(data2cats.filenames) == len(data2cats.target) + assert len(data2cats.filenames) == len(data2cats.data) # Check that the first entry of the reduced dataset corresponds to # the first entry of the corresponding category in the full dataset @@ -37,7 +37,7 @@ def test_20news(): category = data2cats.target_names[data2cats.target[0]] label = data.target_names.index(category) entry2 = data.data[np.where(data.target == label)[0][0]] - assert_equal(entry1, entry2) + assert entry1 == entry2 def test_20news_length_consistency(): @@ -52,9 +52,9 @@ def test_20news_length_consistency(): raise SkipTest("Download 20 newsgroups to run this test") # Extract the full dataset data = datasets.fetch_20newsgroups(subset='all') - assert_equal(len(data['data']), len(data.data)) - assert_equal(len(data['target']), len(data.target)) - assert_equal(len(data['filenames']), len(data.filenames)) + assert len(data['data']) == len(data.data) + assert len(data['target']) == len(data.target) + assert len(data['filenames']) == len(data.filenames) def test_20news_vectorized(): @@ -67,16 +67,16 @@ def test_20news_vectorized(): # test subset = train bunch = datasets.fetch_20newsgroups_vectorized(subset="train") assert sp.isspmatrix_csr(bunch.data) - assert_equal(bunch.data.shape, (11314, 130107)) - assert_equal(bunch.target.shape[0], 11314) - assert_equal(bunch.data.dtype, np.float64) + assert bunch.data.shape == (11314, 130107) + assert bunch.target.shape[0] == 11314 + assert bunch.data.dtype == np.float64 # test subset = test bunch = datasets.fetch_20newsgroups_vectorized(subset="test") assert sp.isspmatrix_csr(bunch.data) - assert_equal(bunch.data.shape, (7532, 130107)) - assert_equal(bunch.target.shape[0], 7532) - assert_equal(bunch.data.dtype, np.float64) + assert bunch.data.shape == (7532, 130107) + assert bunch.target.shape[0] == 7532 + assert bunch.data.dtype == np.float64 # test return_X_y option fetch_func = partial(datasets.fetch_20newsgroups_vectorized, subset='test') @@ -85,6 +85,6 @@ def test_20news_vectorized(): # test subset = all bunch = datasets.fetch_20newsgroups_vectorized(subset='all') assert sp.isspmatrix_csr(bunch.data) - assert_equal(bunch.data.shape, (11314 + 7532, 130107)) - assert_equal(bunch.target.shape[0], 11314 + 7532) - assert_equal(bunch.data.dtype, np.float64) + assert bunch.data.shape == (11314 + 7532, 130107) + assert bunch.target.shape[0] == 11314 + 7532 + assert bunch.data.dtype == np.float64 diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 676cb00fd16f8..ef802d0c588a6 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -72,7 +72,7 @@ def test_category_dir_2(load_files_root): def test_data_home(data_home): # get_data_home will point to a pre-existing folder data_home = get_data_home(data_home=data_home) - assert_equal(data_home, data_home) + assert data_home == data_home assert os.path.exists(data_home) # clear_data_home will delete both the content and the folder it-self @@ -86,9 +86,9 @@ def test_data_home(data_home): def test_default_empty_load_files(load_files_root): res = load_files(load_files_root) - assert_equal(len(res.filenames), 0) - assert_equal(len(res.target_names), 0) - assert_equal(res.DESCR, None) + assert len(res.filenames) == 0 + assert len(res.target_names) == 0 + assert res.DESCR == None def test_default_load_files(test_category_dir_1, test_category_dir_2, @@ -96,10 +96,10 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2, if IS_PYPY: pytest.xfail('[PyPy] fails due to string containing NUL characters') res = load_files(load_files_root) - assert_equal(len(res.filenames), 1) - assert_equal(len(res.target_names), 2) - assert_equal(res.DESCR, None) - assert_equal(res.data, [b"Hello World!\n"]) + assert len(res.filenames) == 1 + assert len(res.target_names) == 2 + assert res.DESCR == None + assert res.data == [b"Hello World!\n"] def test_load_files_w_categories_desc_and_encoding( @@ -109,26 +109,26 @@ def test_load_files_w_categories_desc_and_encoding( category = os.path.abspath(test_category_dir_1).split('/').pop() res = load_files(load_files_root, description="test", categories=category, encoding="utf-8") - assert_equal(len(res.filenames), 1) - assert_equal(len(res.target_names), 1) - assert_equal(res.DESCR, "test") - assert_equal(res.data, ["Hello World!\n"]) + assert len(res.filenames) == 1 + assert len(res.target_names) == 1 + assert res.DESCR == "test" + assert res.data == ["Hello World!\n"] def test_load_files_wo_load_content( test_category_dir_1, test_category_dir_2, load_files_root): res = load_files(load_files_root, load_content=False) - assert_equal(len(res.filenames), 1) - assert_equal(len(res.target_names), 2) - assert_equal(res.DESCR, None) - assert_equal(res.get('data'), None) + assert len(res.filenames) == 1 + assert len(res.target_names) == 2 + assert res.DESCR == None + assert res.get('data') == None def test_load_sample_images(): try: res = load_sample_images() - assert_equal(len(res.images), 2) - assert_equal(len(res.filenames), 2) + assert len(res.images) == 2 + assert len(res.filenames) == 2 images = res.images # assert is china image @@ -144,8 +144,8 @@ def test_load_sample_images(): def test_load_digits(): digits = load_digits() - assert_equal(digits.data.shape, (1797, 64)) - assert_equal(numpy.unique(digits.target).size, 10) + assert digits.data.shape == (1797, 64) + assert numpy.unique(digits.target).size == 10 # test return_X_y option check_return_X_y(digits, partial(load_digits)) @@ -153,15 +153,15 @@ def test_load_digits(): def test_load_digits_n_class_lt_10(): digits = load_digits(9) - assert_equal(digits.data.shape, (1617, 64)) - assert_equal(numpy.unique(digits.target).size, 9) + assert digits.data.shape == (1617, 64) + assert numpy.unique(digits.target).size == 9 def test_load_sample_image(): try: china = load_sample_image('china.jpg') - assert_equal(china.dtype, 'uint8') - assert_equal(china.shape, (427, 640, 3)) + assert china.dtype == 'uint8' + assert china.shape == (427, 640, 3) except ImportError: warnings.warn("Could not load sample images, PIL is not available.") @@ -176,9 +176,9 @@ def test_load_missing_sample_image_error(): def test_load_diabetes(): res = load_diabetes() - assert_equal(res.data.shape, (442, 10)) + assert res.data.shape == (442, 10) assert res.target.size, 442 - assert_equal(len(res.feature_names), 10) + assert len(res.feature_names) == 10 assert res.DESCR # test return_X_y option @@ -187,9 +187,9 @@ def test_load_diabetes(): def test_load_linnerud(): res = load_linnerud() - assert_equal(res.data.shape, (20, 3)) - assert_equal(res.target.shape, (20, 3)) - assert_equal(len(res.target_names), 3) + assert res.data.shape == (20, 3) + assert res.target.shape == (20, 3) + assert len(res.target_names) == 3 assert res.DESCR assert os.path.exists(res.data_filename) assert os.path.exists(res.target_filename) @@ -200,9 +200,9 @@ def test_load_linnerud(): def test_load_iris(): res = load_iris() - assert_equal(res.data.shape, (150, 4)) - assert_equal(res.target.size, 150) - assert_equal(res.target_names.size, 3) + assert res.data.shape == (150, 4) + assert res.target.size == 150 + assert res.target_names.size == 3 assert res.DESCR assert os.path.exists(res.filename) @@ -212,9 +212,9 @@ def test_load_iris(): def test_load_wine(): res = load_wine() - assert_equal(res.data.shape, (178, 13)) - assert_equal(res.target.size, 178) - assert_equal(res.target_names.size, 3) + assert res.data.shape == (178, 13) + assert res.target.size == 178 + assert res.target_names.size == 3 assert res.DESCR # test return_X_y option @@ -223,9 +223,9 @@ def test_load_wine(): def test_load_breast_cancer(): res = load_breast_cancer() - assert_equal(res.data.shape, (569, 30)) - assert_equal(res.target.size, 569) - assert_equal(res.target_names.size, 2) + assert res.data.shape == (569, 30) + assert res.target.size == 569 + assert res.target_names.size == 2 assert res.DESCR assert os.path.exists(res.filename) @@ -235,9 +235,9 @@ def test_load_breast_cancer(): def test_load_boston(): res = load_boston() - assert_equal(res.data.shape, (506, 13)) - assert_equal(res.target.size, 506) - assert_equal(res.feature_names.size, 13) + assert res.data.shape == (506, 13) + assert res.target.size == 506 + assert res.feature_names.size == 13 assert res.DESCR assert os.path.exists(res.filename) @@ -249,7 +249,7 @@ def test_loads_dumps_bunch(): bunch = Bunch(x="x") bunch_from_pkl = loads(dumps(bunch)) bunch_from_pkl.x = "y" - assert_equal(bunch_from_pkl['x'], bunch_from_pkl.x) + assert bunch_from_pkl['x'] == bunch_from_pkl.x def test_bunch_pickle_generated_with_0_16_and_read_with_0_17(): @@ -264,13 +264,13 @@ def test_bunch_pickle_generated_with_0_16_and_read_with_0_17(): bunch.__dict__['key'] = 'set from __dict__' bunch_from_pkl = loads(dumps(bunch)) # After loading from pickle the __dict__ should have been ignored - assert_equal(bunch_from_pkl.key, 'original') - assert_equal(bunch_from_pkl['key'], 'original') + assert bunch_from_pkl.key == 'original' + assert bunch_from_pkl['key'] == 'original' # Making sure that changing the attr does change the value # associated with __getitem__ as well bunch_from_pkl.key = 'changed' - assert_equal(bunch_from_pkl.key, 'changed') - assert_equal(bunch_from_pkl['key'], 'changed') + assert bunch_from_pkl.key == 'changed' + assert bunch_from_pkl['key'] == 'changed' def test_bunch_dir(): diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index 449382f824525..0c30a0c7d5b18 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -22,14 +22,14 @@ def test_fetch(): data2 = fetch(shuffle=True, random_state=37) X1, X2 = data1['data'], data2['data'] - assert_equal((581012, 54), X1.shape) - assert_equal(X1.shape, X2.shape) + assert (581012, 54) == X1.shape + assert X1.shape == X2.shape - assert_equal(X1.sum(), X2.sum()) + assert X1.sum() == X2.sum() y1, y2 = data1['target'], data2['target'] - assert_equal((X1.shape[0],), y1.shape) - assert_equal((X1.shape[0],), y2.shape) + assert (X1.shape[0],) == y1.shape + assert (X1.shape[0],) == y2.shape # test return_X_y option fetch_func = partial(fetch) diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index ce7096f3863b8..6efb23c6dfd26 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -18,28 +18,28 @@ def test_percent10(): except IOError: raise SkipTest("kddcup99 dataset can not be loaded.") - assert_equal(data.data.shape, (494021, 41)) - assert_equal(data.target.shape, (494021,)) + assert data.data.shape == (494021, 41) + assert data.target.shape == (494021,) data_shuffled = fetch_kddcup99(shuffle=True, random_state=0) - assert_equal(data.data.shape, data_shuffled.data.shape) - assert_equal(data.target.shape, data_shuffled.target.shape) + assert data.data.shape == data_shuffled.data.shape + assert data.target.shape == data_shuffled.target.shape data = fetch_kddcup99('SA') - assert_equal(data.data.shape, (100655, 41)) - assert_equal(data.target.shape, (100655,)) + assert data.data.shape == (100655, 41) + assert data.target.shape == (100655,) data = fetch_kddcup99('SF') - assert_equal(data.data.shape, (73237, 4)) - assert_equal(data.target.shape, (73237,)) + assert data.data.shape == (73237, 4) + assert data.target.shape == (73237,) data = fetch_kddcup99('http') - assert_equal(data.data.shape, (58725, 3)) - assert_equal(data.target.shape, (58725,)) + assert data.data.shape == (58725, 3) + assert data.target.shape == (58725,) data = fetch_kddcup99('smtp') - assert_equal(data.data.shape, (9571, 3)) - assert_equal(data.target.shape, (9571,)) + assert data.data.shape == (9571, 3) + assert data.target.shape == (9571,) fetch_func = partial(fetch_kddcup99, 'smtp') check_return_X_y(data, fetch_func) diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 1afd09084371c..11211e803f93d 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -117,8 +117,8 @@ def test_load_fake_lfw_people(): # The data is croped around the center as a rectangular bounding box # around the face. Colors are converted to gray levels: - assert_equal(lfw_people.images.shape, (10, 62, 47)) - assert_equal(lfw_people.data.shape, (10, 2914)) + assert lfw_people.images.shape == (10, 62, 47) + assert lfw_people.data.shape == (10, 2914) # the target is array of person integer ids assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2]) @@ -132,7 +132,7 @@ def test_load_fake_lfw_people(): lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True, download_if_missing=False) - assert_equal(lfw_people.images.shape, (17, 250, 250, 3)) + assert lfw_people.images.shape == (17, 250, 250, 3) # the ids and class names are the same as previously assert_array_equal(lfw_people.target, @@ -166,7 +166,7 @@ def test_load_fake_lfw_pairs(): # The data is croped around the center as a rectangular bounding box # around the face. Colors are converted to gray levels: - assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 62, 47)) + assert lfw_pairs_train.pairs.shape == (10, 2, 62, 47) # the target is whether the person is the same or not assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) @@ -180,7 +180,7 @@ def test_load_fake_lfw_pairs(): lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True, download_if_missing=False) - assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 250, 250, 3)) + assert lfw_pairs_train.pairs.shape == (10, 2, 250, 250, 3) # the ids and class names are the same as previously assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py index ea12c9f8e3a12..aa747bd5d74fe 100644 --- a/sklearn/datasets/tests/test_rcv1.py +++ b/sklearn/datasets/tests/test_rcv1.py @@ -28,14 +28,14 @@ def test_fetch_rcv1(): # test sparsity assert sp.issparse(X1) assert sp.issparse(Y1) - assert_equal(60915113, X1.data.size) - assert_equal(2606875, Y1.data.size) + assert 60915113 == X1.data.size + assert 2606875 == Y1.data.size # test shapes - assert_equal((804414, 47236), X1.shape) - assert_equal((804414, 103), Y1.shape) - assert_equal((804414,), s1.shape) - assert_equal(103, len(cat_list)) + assert (804414, 47236) == X1.shape + assert (804414, 103) == Y1.shape + assert (804414,) == s1.shape + assert 103 == len(cat_list) # test ordering of categories first_categories = ['C11', 'C12', 'C13', 'C14', 'C15', 'C151'] @@ -46,7 +46,7 @@ def test_fetch_rcv1(): number_non_zero_in_cat = (5, 1206, 381327) for num, cat in zip(number_non_zero_in_cat, some_categories): j = cat_list.index(cat) - assert_equal(num, Y1[:, j].data.size) + assert num == Y1[:, j].data.size # test shuffling and subset data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77, diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index c66a056a5a0aa..f3e0e20b7dea8 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -44,24 +44,23 @@ def test_make_classification(): shift=None, scale=None, weights=weights, random_state=0) - assert_equal(weights, [0.1, 0.25]) - assert_equal(X.shape, (100, 20), "X shape mismatch") - assert_equal(y.shape, (100,), "y shape mismatch") - assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes") - assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0") - assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1") - assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2") + assert weights == [0.1, 0.25] + assert X.shape == (100, 20), "X shape mismatch" + assert y.shape == (100,), "y shape mismatch" + assert np.unique(y).shape == (3,), "Unexpected number of classes" + assert sum(y == 0) == 10, "Unexpected number of samples in class #0" + assert sum(y == 1) == 25, "Unexpected number of samples in class #1" + assert sum(y == 2) == 65, "Unexpected number of samples in class #2" # Test for n_features > 30 X, y = make_classification(n_samples=2000, n_features=31, n_informative=31, n_redundant=0, n_repeated=0, hypercube=True, scale=0.5, random_state=0) - assert_equal(X.shape, (2000, 31), "X shape mismatch") - assert_equal(y.shape, (2000,), "y shape mismatch") - assert_equal(np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype) - .reshape(-1, X.shape[1]).shape[0], 2000, - "Unexpected number of unique rows") + assert X.shape == (2000, 31), "X shape mismatch" + assert y.shape == (2000,), "y shape mismatch" + assert (np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype) + .reshape(-1, X.shape[1]).shape[0] == 2000), "Unexpected number of unique rows" def test_make_classification_informative_features(): @@ -95,8 +94,8 @@ def test_make_classification_informative_features(): n_clusters_per_class=n_clusters_per_class, hypercube=hypercube, random_state=0) - assert_equal(X.shape, (n_samples, n_informative)) - assert_equal(y.shape, (n_samples,)) + assert X.shape == (n_samples, n_informative) + assert y.shape == (n_samples,) # Cluster by sign, viewed as strings to allow uniquing signs = np.sign(X) @@ -104,18 +103,18 @@ def test_make_classification_informative_features(): unique_signs, cluster_index = np.unique(signs, return_inverse=True) - assert_equal(len(unique_signs), n_clusters, - "Wrong number of clusters, or not in distinct " - "quadrants") + assert_message = ("Wrong number of clusters, or not in distinct " + "quadrants") + assert len(unique_signs) == n_clusters, assert_message clusters_by_class = defaultdict(set) for cluster, cls in zip(cluster_index, y): clusters_by_class[cls].add(cluster) for clusters in clusters_by_class.values(): - assert_equal(len(clusters), n_clusters_per_class, - "Wrong number of clusters per class") - assert_equal(len(clusters_by_class), n_classes, - "Wrong number of classes") + assert_message = "Wrong number of clusters per class" + assert len(clusters) == n_clusters_per_class, assert_message + assert (len(clusters_by_class) + == n_classes), "Wrong number of classes" assert_array_almost_equal(np.bincount(y) / len(y) // weights, [1] * n_classes, @@ -153,10 +152,10 @@ def test_make_multilabel_classification_return_sequences(): n_classes=3, random_state=0, return_indicator=False, allow_unlabeled=allow_unlabeled) - assert_equal(X.shape, (100, 20), "X shape mismatch") + assert X.shape == (100, 20), "X shape mismatch" if not allow_unlabeled: - assert_equal(max([max(y) for y in Y]), 2) - assert_equal(min([len(y) for y in Y]), min_length) + assert max([max(y) for y in Y]) == 2 + assert min([len(y) for y in Y]) == min_length assert max([len(y) for y in Y]) <= 3 @@ -165,8 +164,8 @@ def test_make_multilabel_classification_return_indicator(): X, Y = make_multilabel_classification(n_samples=25, n_features=20, n_classes=3, random_state=0, allow_unlabeled=allow_unlabeled) - assert_equal(X.shape, (25, 20), "X shape mismatch") - assert_equal(Y.shape, (25, 3), "Y shape mismatch") + assert X.shape == (25, 20), "X shape mismatch" + assert Y.shape == (25, 3), "Y shape mismatch" assert np.all(np.sum(Y, axis=0) > min_length) # Also test return_distributions and return_indicator with True @@ -176,9 +175,9 @@ def test_make_multilabel_classification_return_indicator(): assert_array_almost_equal(X, X2) assert_array_equal(Y, Y2) - assert_equal(p_c.shape, (3,)) + assert p_c.shape == (3,) assert_almost_equal(p_c.sum(), 1) - assert_equal(p_w_c.shape, (20, 3)) + assert p_w_c.shape == (20, 3) assert_almost_equal(p_w_c.sum(axis=0), [1] * 3) @@ -188,16 +187,16 @@ def test_make_multilabel_classification_return_indicator_sparse(): n_classes=3, random_state=0, return_indicator='sparse', allow_unlabeled=allow_unlabeled) - assert_equal(X.shape, (25, 20), "X shape mismatch") - assert_equal(Y.shape, (25, 3), "Y shape mismatch") + assert X.shape == (25, 20), "X shape mismatch" + assert Y.shape == (25, 3), "Y shape mismatch" assert sp.issparse(Y) def test_make_hastie_10_2(): X, y = make_hastie_10_2(n_samples=100, random_state=0) - assert_equal(X.shape, (100, 10), "X shape mismatch") - assert_equal(y.shape, (100,), "y shape mismatch") - assert_equal(np.unique(y).shape, (2,), "Unexpected number of classes") + assert X.shape == (100, 10), "X shape mismatch" + assert y.shape == (100,), "y shape mismatch" + assert np.unique(y).shape == (2,), "Unexpected number of classes" def test_make_regression(): @@ -205,26 +204,26 @@ def test_make_regression(): effective_rank=5, coef=True, bias=0.0, noise=1.0, random_state=0) - assert_equal(X.shape, (100, 10), "X shape mismatch") - assert_equal(y.shape, (100,), "y shape mismatch") - assert_equal(c.shape, (10,), "coef shape mismatch") - assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features") + assert X.shape == (100, 10), "X shape mismatch" + assert y.shape == (100,), "y shape mismatch" + assert c.shape == (10,), "coef shape mismatch" + assert sum(c != 0.0) == 3, "Unexpected number of informative features" # Test that y ~= np.dot(X, c) + bias + N(0, 1.0). assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1) # Test with small number of features. X, y = make_regression(n_samples=100, n_features=1) # n_informative=3 - assert_equal(X.shape, (100, 1)) + assert X.shape == (100, 1) def test_make_regression_multitarget(): X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3, n_targets=3, coef=True, noise=1., random_state=0) - assert_equal(X.shape, (100, 10), "X shape mismatch") - assert_equal(y.shape, (100, 3), "y shape mismatch") - assert_equal(c.shape, (10, 3), "coef shape mismatch") + assert X.shape == (100, 10), "X shape mismatch" + assert y.shape == (100, 3), "y shape mismatch" + assert c.shape == (10, 3), "coef shape mismatch" assert_array_equal(sum(c != 0.0), 3, "Unexpected number of informative features") @@ -240,7 +239,7 @@ def test_make_blobs(): assert X.shape == (50, 2), "X shape mismatch" assert y.shape == (50,), "y shape mismatch" - assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs") + assert np.unique(y).shape == (3,), "Unexpected number of blobs" for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)): assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std") @@ -308,8 +307,8 @@ def test_make_friedman1(): X, y = make_friedman1(n_samples=5, n_features=10, noise=0.0, random_state=0) - assert_equal(X.shape, (5, 10), "X shape mismatch") - assert_equal(y.shape, (5,), "y shape mismatch") + assert X.shape == (5, 10), "X shape mismatch" + assert y.shape == (5,), "y shape mismatch" assert_array_almost_equal(y, 10 * np.sin(np.pi * X[:, 0] * X[:, 1]) @@ -320,8 +319,8 @@ def test_make_friedman1(): def test_make_friedman2(): X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0) - assert_equal(X.shape, (5, 4), "X shape mismatch") - assert_equal(y.shape, (5,), "y shape mismatch") + assert X.shape == (5, 4), "X shape mismatch" + assert y.shape == (5,), "y shape mismatch" assert_array_almost_equal(y, (X[:, 0] ** 2 @@ -332,8 +331,8 @@ def test_make_friedman2(): def test_make_friedman3(): X, y = make_friedman3(n_samples=5, noise=0.0, random_state=0) - assert_equal(X.shape, (5, 4), "X shape mismatch") - assert_equal(y.shape, (5,), "y shape mismatch") + assert X.shape == (5, 4), "X shape mismatch" + assert y.shape == (5,), "y shape mismatch" assert_array_almost_equal(y, np.arctan((X[:, 1] * X[:, 2] - 1 / (X[:, 1] * X[:, 3])) @@ -344,22 +343,22 @@ def test_make_low_rank_matrix(): X = make_low_rank_matrix(n_samples=50, n_features=25, effective_rank=5, tail_strength=0.01, random_state=0) - assert_equal(X.shape, (50, 25), "X shape mismatch") + assert X.shape == (50, 25), "X shape mismatch" from numpy.linalg import svd u, s, v = svd(X) - assert_less(sum(s) - 5, 0.1, "X rank is not approximately 5") + assert sum(s) - 5 < 0.1, "X rank is not approximately 5" def test_make_sparse_coded_signal(): Y, D, X = make_sparse_coded_signal(n_samples=5, n_components=8, n_features=10, n_nonzero_coefs=3, random_state=0) - assert_equal(Y.shape, (10, 5), "Y shape mismatch") - assert_equal(D.shape, (10, 8), "D shape mismatch") - assert_equal(X.shape, (8, 5), "X shape mismatch") + assert Y.shape == (10, 5), "Y shape mismatch" + assert D.shape == (10, 8), "D shape mismatch" + assert X.shape == (8, 5), "X shape mismatch" for col in X.T: - assert_equal(len(np.flatnonzero(col)), 3, 'Non-zero coefs mismatch') + assert len(np.flatnonzero(col)) == 3, 'Non-zero coefs mismatch' assert_array_almost_equal(np.dot(D, X), Y) assert_array_almost_equal(np.sqrt((D ** 2).sum(axis=0)), np.ones(D.shape[1])) @@ -368,14 +367,14 @@ def test_make_sparse_coded_signal(): def test_make_sparse_uncorrelated(): X, y = make_sparse_uncorrelated(n_samples=5, n_features=10, random_state=0) - assert_equal(X.shape, (5, 10), "X shape mismatch") - assert_equal(y.shape, (5,), "y shape mismatch") + assert X.shape == (5, 10), "X shape mismatch" + assert y.shape == (5,), "y shape mismatch" def test_make_spd_matrix(): X = make_spd_matrix(n_dim=5, random_state=0) - assert_equal(X.shape, (5, 5), "X shape mismatch") + assert X.shape == (5, 5), "X shape mismatch" assert_array_almost_equal(X, X.T) from numpy.linalg import eig @@ -387,8 +386,8 @@ def test_make_spd_matrix(): def test_make_swiss_roll(): X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0) - assert_equal(X.shape, (5, 3), "X shape mismatch") - assert_equal(t.shape, (5,), "t shape mismatch") + assert X.shape == (5, 3), "X shape mismatch" + assert t.shape == (5,), "t shape mismatch" assert_array_almost_equal(X[:, 0], t * np.cos(t)) assert_array_almost_equal(X[:, 2], t * np.sin(t)) @@ -396,8 +395,8 @@ def test_make_swiss_roll(): def test_make_s_curve(): X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0) - assert_equal(X.shape, (5, 3), "X shape mismatch") - assert_equal(t.shape, (5,), "t shape mismatch") + assert X.shape == (5, 3), "X shape mismatch" + assert t.shape == (5,), "t shape mismatch" assert_array_almost_equal(X[:, 0], np.sin(t)) assert_array_almost_equal(X[:, 2], np.sign(t) * (np.cos(t) - 1)) @@ -405,9 +404,9 @@ def test_make_s_curve(): def test_make_biclusters(): X, rows, cols = make_biclusters( shape=(100, 100), n_clusters=4, shuffle=True, random_state=0) - assert_equal(X.shape, (100, 100), "X shape mismatch") - assert_equal(rows.shape, (4, 100), "rows shape mismatch") - assert_equal(cols.shape, (4, 100,), "columns shape mismatch") + assert X.shape == (100, 100), "X shape mismatch" + assert rows.shape == (4, 100), "rows shape mismatch" + assert cols.shape == (4, 100,), "columns shape mismatch" assert_all_finite(X) assert_all_finite(rows) assert_all_finite(cols) @@ -421,9 +420,9 @@ def test_make_checkerboard(): X, rows, cols = make_checkerboard( shape=(100, 100), n_clusters=(20, 5), shuffle=True, random_state=0) - assert_equal(X.shape, (100, 100), "X shape mismatch") - assert_equal(rows.shape, (100, 100), "rows shape mismatch") - assert_equal(cols.shape, (100, 100,), "columns shape mismatch") + assert X.shape == (100, 100), "X shape mismatch" + assert rows.shape == (100, 100), "rows shape mismatch" + assert cols.shape == (100, 100,), "columns shape mismatch" X, rows, cols = make_checkerboard( shape=(100, 100), n_clusters=2, shuffle=True, random_state=0) @@ -455,8 +454,8 @@ def test_make_circles(): # created an even number of samples. X, y = make_circles(n_samples, shuffle=False, noise=None, factor=factor) - assert_equal(X.shape, (n_samples, 2), "X shape mismatch") - assert_equal(y.shape, (n_samples,), "y shape mismatch") + assert X.shape == (n_samples, 2), "X shape mismatch" + assert y.shape == (n_samples,), "y shape mismatch" center = [0.0, 0.0] for x, label in zip(X, y): dist_sqr = ((x - center) ** 2).sum() @@ -464,10 +463,8 @@ def test_make_circles(): assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") - assert_equal(X[y == 0].shape, (n_outer, 2), - "Samples not correctly distributed across circles.") - assert_equal(X[y == 1].shape, (n_inner, 2), - "Samples not correctly distributed across circles.") + assert X[y == 0].shape == (n_outer, 2), "Samples not correctly distributed across circles." + assert X[y == 1].shape == (n_inner, 2), "Samples not correctly distributed across circles." assert_raises(ValueError, make_circles, factor=-0.01) assert_raises(ValueError, make_circles, factor=1.) diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index c25344e4acfcf..bec67a7aa3819 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -34,28 +34,28 @@ def test_load_svmlight_file(): X, y = load_svmlight_file(datafile) # test X's shape - assert_equal(X.indptr.shape[0], 7) - assert_equal(X.shape[0], 6) - assert_equal(X.shape[1], 21) - assert_equal(y.shape[0], 6) + assert X.indptr.shape[0] == 7 + assert X.shape[0] == 6 + assert X.shape[1] == 21 + assert y.shape[0] == 6 # test X's non-zero values for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (0, 15, 1.5), (1, 5, 1.0), (1, 12, -3), (2, 20, 27)): - assert_equal(X[i, j], val) + assert X[i, j] == val # tests X's zero values - assert_equal(X[0, 3], 0) - assert_equal(X[0, 5], 0) - assert_equal(X[1, 8], 0) - assert_equal(X[1, 16], 0) - assert_equal(X[2, 18], 0) + assert X[0, 3] == 0 + assert X[0, 5] == 0 + assert X[1, 8] == 0 + assert X[1, 16] == 0 + assert X[2, 18] == 0 # test can change X's values X[0, 2] *= 2 - assert_equal(X[0, 2], 5) + assert X[0, 2] == 5 # test y assert_array_equal(y, [1, 2, 3, 4, 1, 2]) @@ -76,7 +76,7 @@ def test_load_svmlight_file_fd(): def test_load_svmlight_file_multilabel(): X, y = load_svmlight_file(multifile, multilabel=True) - assert_equal(y, [(0, 1), (2,), (), (1, 2)]) + assert y == [(0, 1), (2,), (), (1, 2)] def test_load_svmlight_files(): @@ -84,29 +84,29 @@ def test_load_svmlight_files(): dtype=np.float32) assert_array_equal(X_train.toarray(), X_test.toarray()) assert_array_almost_equal(y_train, y_test) - assert_equal(X_train.dtype, np.float32) - assert_equal(X_test.dtype, np.float32) + assert X_train.dtype == np.float32 + assert X_test.dtype == np.float32 X1, y1, X2, y2, X3, y3 = load_svmlight_files([datafile] * 3, dtype=np.float64) - assert_equal(X1.dtype, X2.dtype) - assert_equal(X2.dtype, X3.dtype) - assert_equal(X3.dtype, np.float64) + assert X1.dtype == X2.dtype + assert X2.dtype == X3.dtype + assert X3.dtype == np.float64 def test_load_svmlight_file_n_features(): X, y = load_svmlight_file(datafile, n_features=22) # test X'shape - assert_equal(X.indptr.shape[0], 7) - assert_equal(X.shape[0], 6) - assert_equal(X.shape[1], 22) + assert X.indptr.shape[0] == 7 + assert X.shape[0] == 6 + assert X.shape[1] == 22 # test X's non-zero values for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (1, 5, 1.0), (1, 12, -3)): - assert_equal(X[i, j], val) + assert X[i, j] == val # 21 features in file assert_raises(ValueError, load_svmlight_file, datafile, n_features=20) @@ -159,13 +159,13 @@ def test_load_zero_based_auto(): f1 = BytesIO(data1) X, y = load_svmlight_file(f1, zero_based="auto") - assert_equal(X.shape, (1, 3)) + assert X.shape == (1, 3) f1 = BytesIO(data1) f2 = BytesIO(data2) X1, y1, X2, y2 = load_svmlight_files([f1, f2], zero_based="auto") - assert_equal(X1.shape, (1, 4)) - assert_equal(X2.shape, (1, 4)) + assert X1.shape == (1, 4) + assert X2.shape == (1, 4) def test_load_with_qid(): @@ -250,16 +250,16 @@ def test_dump(): comment = f.readline() comment = str(comment, "utf-8") - assert_in("scikit-learn %s" % sklearn.__version__, comment) + assert "scikit-learn %s" % sklearn.__version__ in comment comment = f.readline() comment = str(comment, "utf-8") - assert_in(["one", "zero"][zero_based] + "-based", comment) + assert ["one", "zero"][zero_based] + "-based" in comment X2, y2 = load_svmlight_file(f, dtype=dtype, zero_based=zero_based) - assert_equal(X2.dtype, dtype) + assert X2.dtype == dtype assert_array_equal(X2.sorted_indices().indices, X2.indices) X2_dense = X2.toarray() @@ -293,9 +293,9 @@ def test_dump_multilabel(): dump_svmlight_file(X, y, f, multilabel=True) f.seek(0) # make sure it dumps multilabel correctly - assert_equal(f.readline(), b"1 0:1 2:3 4:5\n") - assert_equal(f.readline(), b"0,2 \n") - assert_equal(f.readline(), b"0,1 1:5 3:1\n") + assert f.readline() == b"1 0:1 2:3 4:5\n" + assert f.readline() == b"0,2 \n" + assert f.readline() == b"0,1 1:5 3:1\n" def test_dump_concise(): @@ -315,12 +315,12 @@ def test_dump_concise(): dump_svmlight_file(X, y, f) f.seek(0) # make sure it's using the most concise format possible - assert_equal(f.readline(), + assert (f.readline() == b"1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n") - assert_equal(f.readline(), b"2.1 0:1000000000 1:2e+18 2:3e+27\n") - assert_equal(f.readline(), b"3.01 \n") - assert_equal(f.readline(), b"1.000000000000001 \n") - assert_equal(f.readline(), b"1 \n") + assert f.readline() == b"2.1 0:1000000000 1:2e+18 2:3e+27\n" + assert f.readline() == b"3.01 \n" + assert f.readline() == b"1.000000000000001 \n" + assert f.readline() == b"1 \n" f.seek(0) # make sure it's correct too :) X2, y2 = load_svmlight_file(f) From 2065724da6f77da51c654705fa904ae3228adbab Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:31:04 +0200 Subject: [PATCH 04/22] fix decomposition --- .../decomposition/tests/test_dict_learning.py | 24 ++++++++--------- .../tests/test_factor_analysis.py | 6 ++--- sklearn/decomposition/tests/test_fastica.py | 20 +++++++------- .../decomposition/tests/test_kernel_pca.py | 26 +++++++++---------- sklearn/decomposition/tests/test_nmf.py | 12 ++++----- .../decomposition/tests/test_online_lda.py | 10 +++---- .../decomposition/tests/test_sparse_pca.py | 16 ++++++------ 7 files changed, 57 insertions(+), 57 deletions(-) diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index f0bd4bedfe5b1..cbe4c822cb5ab 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -37,18 +37,18 @@ def test_sparse_encode_shapes_omp(): for algorithm, n_jobs in itertools.product(algorithms, [1, 3]): code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs) - assert_equal(code.shape, (n_samples, n_components)) + assert code.shape == (n_samples, n_components) def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) - assert_equal(dico.components_.shape, (n_components, n_features)) + assert dico.components_.shape == (n_components, n_features) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) - assert_equal(dico.components_.shape, (n_components, n_features)) - assert_equal(dico.transform(X).shape, (X.shape[0], n_components)) + assert dico.components_.shape == (n_components, n_features) + assert dico.transform(X).shape == (X.shape[0], n_components) def test_dict_learning_overcomplete(): @@ -166,7 +166,7 @@ def test_dict_learning_nonzero_coefs(): dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) - assert_equal(len(np.flatnonzero(code)), 3) + assert len(np.flatnonzero(code)) == 3 def test_dict_learning_unknown_fit_algorithm(): @@ -192,9 +192,9 @@ def test_dict_learning_online_shapes(): n_components = 8 code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng) - assert_equal(code.shape, (n_samples, n_components)) - assert_equal(dictionary.shape, (n_components, n_features)) - assert_equal(np.dot(code, dictionary).shape, X.shape) + assert code.shape == (n_samples, n_components) + assert dictionary.shape == (n_components, n_features) + assert np.dot(code, dictionary).shape == X.shape def test_dict_learning_online_lars_positive_parameter(): @@ -352,7 +352,7 @@ def test_sparse_encode_shapes(): V /= np.sum(V ** 2, axis=1)[:, np.newaxis] for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'): code = sparse_encode(X, V, algorithm=algo) - assert_equal(code.shape, (n_samples, n_components)) + assert code.shape == (n_samples, n_components) @pytest.mark.parametrize("algo", [ @@ -404,7 +404,7 @@ def test_sparse_encode_error(): V /= np.sum(V ** 2, axis=1)[:, np.newaxis] code = sparse_encode(X, V, alpha=0.001) assert not np.all(code == 0) - assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1) + assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1 def test_sparse_encode_error_default_sparsity(): @@ -413,7 +413,7 @@ def test_sparse_encode_error_default_sparsity(): D = rng.randn(2, 64) code = ignore_warnings(sparse_encode)(X, D, algorithm='omp', n_nonzero_coefs=None) - assert_equal(code.shape, (100, 2)) + assert code.shape == (100, 2) def test_unknown_method(): @@ -431,7 +431,7 @@ def test_sparse_coder_estimator(): code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars', transform_alpha=0.001).transform(X) assert not np.all(code == 0) - assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1) + assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1 def test_sparse_coder_parallel_mmap(): diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index f039ef2abc865..8547a3c0f6bff 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -46,13 +46,13 @@ def test_factor_analysis(): fas.append(fa) X_t = fa.transform(X) - assert_equal(X_t.shape, (n_samples, n_components)) + assert X_t.shape == (n_samples, n_components) assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum()) assert_almost_equal(fa.score_samples(X).mean(), fa.score(X)) diff = np.all(np.diff(fa.loglike_)) - assert_greater(diff, 0., 'Log likelihood dif not increase') + assert diff > 0., 'Log likelihood dif not increase' # Sample Covariance scov = np.cov(X, rowvar=0., bias=1.) @@ -60,7 +60,7 @@ def test_factor_analysis(): # Model Covariance mcov = fa.get_covariance() diff = np.sum(np.abs(scov - mcov)) / W.size - assert_less(diff, 0.1, "Mean absolute difference is %f" % diff) + assert diff < 0.1, "Mean absolute difference is %f" % diff fa = FactorAnalysis(n_components=n_components, noise_variance_init=np.ones(n_features)) assert_raises(ValueError, fa.fit, X[:, :2]) diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 40299ac2aac3a..04ef5d6f86fba 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -44,11 +44,11 @@ def test_gs(): W, _, _ = np.linalg.svd(rng.randn(10, 10)) w = rng.randn(10) _gs_decorrelation(w, W, 10) - assert_less((w ** 2).sum(), 1.e-10) + assert (w ** 2).sum() < 1.e-10 w = rng.randn(10) u = _gs_decorrelation(w, W, 5) tmp = np.dot(u, W.T) - assert_less((tmp[:5] ** 2).sum(), 1.e-10) + assert (tmp[:5] ** 2).sum() < 1.e-10 @pytest.mark.parametrize("add_noise", [True, False]) @@ -123,13 +123,13 @@ def g_test(x): random_state=seed) ica = FastICA(fun=nl, algorithm=algo, random_state=seed) sources = ica.fit_transform(m.T) - assert_equal(ica.components_.shape, (2, 2)) - assert_equal(sources.shape, (1000, 2)) + assert ica.components_.shape == (2, 2) + assert sources.shape == (1000, 2) assert_array_almost_equal(sources_fun, sources) assert_array_almost_equal(sources, ica.transform(m.T)) - assert_equal(ica.mixing_.shape, (2, 2)) + assert ica.mixing_.shape == (2, 2) for fn in [np.tanh, "exp(-.5(x^2))"]: ica = FastICA(fun=fn, algorithm=algo) @@ -225,12 +225,12 @@ def test_fit_transform(): ica = FastICA(n_components=n_components, whiten=whiten, random_state=0) Xt = ica.fit_transform(X) - assert_equal(ica.components_.shape, (n_components_, 10)) - assert_equal(Xt.shape, (100, n_components_)) + assert ica.components_.shape == (n_components_, 10) + assert Xt.shape == (100, n_components_) ica = FastICA(n_components=n_components, whiten=whiten, random_state=0) ica.fit(X) - assert_equal(ica.components_.shape, (n_components_, 10)) + assert ica.components_.shape == (n_components_, 10) Xt2 = ica.transform(X) assert_array_almost_equal(Xt, Xt2) @@ -257,9 +257,9 @@ def test_inverse_transform(): # catch "n_components ignored" warning Xt = ica.fit_transform(X) expected_shape = expected[(whiten, n_components_)] - assert_equal(ica.mixing_.shape, expected_shape) + assert ica.mixing_.shape == expected_shape X2 = ica.inverse_transform(Xt) - assert_equal(X.shape, X2.shape) + assert X.shape == X2.shape # reversibility test in non-reduction case if n_components == X.shape[1]: diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index 2073875e76c15..c5ac24b3423f7 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -21,7 +21,7 @@ def test_kernel_pca(): def histogram(x, y, **kwargs): # Histogram kernel implemented as a callable. - assert_equal(kwargs, {}) # no kernel_params that we didn't ask for + assert kwargs == {} # no kernel_params that we didn't ask for return np.minimum(x, y).sum() for eigen_solver in ("auto", "dense", "arpack"): @@ -40,17 +40,17 @@ def histogram(x, y, **kwargs): # non-regression test: previously, gamma would be 0 by default, # forcing all eigenvalues to 0 under the poly kernel - assert_not_equal(X_fit_transformed.size, 0) + assert X_fit_transformed.size != 0 # transform new data X_pred_transformed = kpca.transform(X_pred) - assert_equal(X_pred_transformed.shape[1], + assert (X_pred_transformed.shape[1] == X_fit_transformed.shape[1]) # inverse transform if inv: X_pred2 = kpca.inverse_transform(X_pred_transformed) - assert_equal(X_pred2.shape, X_pred.shape) + assert X_pred2.shape == X_pred.shape def test_kernel_pca_invalid_parameters(): @@ -103,7 +103,7 @@ def test_kernel_pca_sparse(): # transform new data X_pred_transformed = kpca.transform(X_pred) - assert_equal(X_pred_transformed.shape[1], + assert (X_pred_transformed.shape[1] == X_fit_transformed.shape[1]) # inverse transform @@ -135,7 +135,7 @@ def test_kernel_pca_n_components(): kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver) shape = kpca.fit(X_fit).transform(X_pred).shape - assert_equal(shape, (2, c)) + assert shape == (2, c) def test_remove_zero_eig(): @@ -144,15 +144,15 @@ def test_remove_zero_eig(): # n_components=None (default) => remove_zero_eig is True kpca = KernelPCA() Xt = kpca.fit_transform(X) - assert_equal(Xt.shape, (3, 0)) + assert Xt.shape == (3, 0) kpca = KernelPCA(n_components=2) Xt = kpca.fit_transform(X) - assert_equal(Xt.shape, (3, 2)) + assert Xt.shape == (3, 2) kpca = KernelPCA(n_components=2, remove_zero_eig=True) Xt = kpca.fit_transform(X) - assert_equal(Xt.shape, (3, 0)) + assert Xt.shape == (3, 0) def test_leave_zero_eig(): @@ -227,7 +227,7 @@ def test_gridsearch_pipeline(): param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2)) grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) grid_search.fit(X, y) - assert_equal(grid_search.best_score_, 1) + assert grid_search.best_score_ == 1 # 0.23. warning about tol not having its correct default value. @@ -244,7 +244,7 @@ def test_gridsearch_pipeline_precomputed(): grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid) X_kernel = rbf_kernel(X, gamma=2.) grid_search.fit(X_kernel, y) - assert_equal(grid_search.best_score_, 1) + assert grid_search.best_score_ == 1 # 0.23. warning about tol not having its correct default value. @@ -256,7 +256,7 @@ def test_nested_circles(): # 2D nested circles are not linearly separable train_score = Perceptron(max_iter=5).fit(X, y).score(X, y) - assert_less(train_score, 0.8) + assert train_score < 0.8 # Project the circles data into the first 2 components of a RBF Kernel # PCA model. @@ -269,4 +269,4 @@ def test_nested_circles(): # The data is perfectly linearly separable in that space train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y) - assert_equal(train_score, 1.0) + assert train_score == 1.0 diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index f2f41ecc52f9a..b6703f0c24c0c 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -111,7 +111,7 @@ def test_nmf_fit_close(solver): pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0, max_iter=600) X = np.abs(rng.randn(6, 5)) - assert_less(pnmf.fit(X).reconstruction_err_, 0.1) + assert pnmf.fit(X).reconstruction_err_ < 0.1 @pytest.mark.parametrize('solver', ('cd', 'mu')) @@ -438,8 +438,8 @@ def test_nmf_regularization(): H_regul_n_zeros = H_regul[H_regul == 0].size H_model_n_zeros = H_model[H_model == 0].size - assert_greater(W_regul_n_zeros, W_model_n_zeros) - assert_greater(H_regul_n_zeros, H_model_n_zeros) + assert W_regul_n_zeros > W_model_n_zeros + assert H_regul_n_zeros > H_model_n_zeros # L2 regularization should decrease the mean of the coefficients l1_ratio = 0. @@ -455,8 +455,8 @@ def test_nmf_regularization(): H_regul = regul.components_ H_model = model.components_ - assert_greater(W_model.mean(), W_regul.mean()) - assert_greater(H_model.mean(), H_regul.mean()) + assert W_model.mean() > W_regul.mean() + assert H_model.mean() > H_regul.mean() @ignore_warnings(category=ConvergenceWarning) @@ -493,7 +493,7 @@ def test_nmf_decreasing(): loss = nmf._beta_divergence(X, W, H, beta_loss) if previous_loss is not None: - assert_greater(previous_loss, loss) + assert previous_loss > loss previous_loss = loss diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index c163a6d58ea65..1c13c890c2ea6 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -274,11 +274,11 @@ def test_lda_perplexity(method): lda_2.fit(X) perp_2 = lda_2.perplexity(X, sub_sampling=False) - assert_greater_equal(perp_1, perp_2) + assert perp_1 >= perp_2 perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True) perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True) - assert_greater_equal(perp_1_subsampling, perp_2_subsampling) + assert perp_1_subsampling >= perp_2_subsampling @pytest.mark.parametrize('method', ('online', 'batch')) @@ -297,7 +297,7 @@ def test_lda_score(method): lda_2.fit_transform(X) score_2 = lda_2.score(X) - assert_greater_equal(score_2, score_1) + assert score_2 >= score_1 def test_perplexity_input_format(): @@ -384,8 +384,8 @@ def check_verbosity(verbose, evaluate_every, expected_lines, n_lines = out.getvalue().count('\n') n_perplexity = out.getvalue().count('perplexity') - assert_equal(expected_lines, n_lines) - assert_equal(expected_perplexities, n_perplexity) + assert expected_lines == n_lines + assert expected_perplexities == n_perplexity @pytest.mark.parametrize( diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index 621738f969d1e..8440dd17717bc 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -45,13 +45,13 @@ def test_correct_shapes(): X = rng.randn(12, 10) spca = SparsePCA(n_components=8, random_state=rng) U = spca.fit_transform(X) - assert_equal(spca.components_.shape, (8, 10)) - assert_equal(U.shape, (12, 8)) + assert spca.components_.shape == (8, 10) + assert U.shape == (12, 8) # test overcomplete decomposition spca = SparsePCA(n_components=13, random_state=rng) U = spca.fit_transform(X) - assert_equal(spca.components_.shape, (13, 10)) - assert_equal(U.shape, (12, 13)) + assert spca.components_.shape == (13, 10) + assert U.shape == (12, 13) def test_fit_transform(): @@ -122,13 +122,13 @@ def test_mini_batch_correct_shapes(): X = rng.randn(12, 10) pca = MiniBatchSparsePCA(n_components=8, random_state=rng) U = pca.fit_transform(X) - assert_equal(pca.components_.shape, (8, 10)) - assert_equal(U.shape, (12, 8)) + assert pca.components_.shape == (8, 10) + assert U.shape == (12, 8) # test overcomplete decomposition pca = MiniBatchSparsePCA(n_components=13, random_state=rng) U = pca.fit_transform(X) - assert_equal(pca.components_.shape, (13, 10)) - assert_equal(U.shape, (12, 13)) + assert pca.components_.shape == (13, 10) + assert U.shape == (12, 13) # XXX: test always skipped From 99ee74a661a738973353a5ab2448c5ea73a64f76 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:35:59 +0200 Subject: [PATCH 05/22] fix ensemble --- sklearn/ensemble/tests/test_bagging.py | 32 ++--- sklearn/ensemble/tests/test_base.py | 18 +-- sklearn/ensemble/tests/test_forest.py | 110 +++++++------- .../ensemble/tests/test_gradient_boosting.py | 134 +++++++++--------- .../test_gradient_boosting_loss_functions.py | 6 +- sklearn/ensemble/tests/test_iforest.py | 14 +- sklearn/ensemble/tests/test_voting.py | 22 +-- .../ensemble/tests/test_weight_boosting.py | 38 ++--- 8 files changed, 185 insertions(+), 189 deletions(-) diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index 8afa0e5c68ec0..f4bda051816ee 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -241,7 +241,7 @@ def test_bootstrap_samples(): bootstrap=False, random_state=rng).fit(X_train, y_train) - assert_equal(base_estimator.score(X_train, y_train), + assert (base_estimator.score(X_train, y_train) == ensemble.score(X_train, y_train)) # with bootstrap, trees are no longer perfect on the training set @@ -250,7 +250,7 @@ def test_bootstrap_samples(): bootstrap=True, random_state=rng).fit(X_train, y_train) - assert_greater(base_estimator.score(X_train, y_train), + assert (base_estimator.score(X_train, y_train) > ensemble.score(X_train, y_train)) # check that each sampling correspond to a complete bootstrap resample. @@ -278,7 +278,7 @@ def test_bootstrap_features(): random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: - assert_equal(boston.data.shape[1], np.unique(features).shape[0]) + assert boston.data.shape[1] == np.unique(features).shape[0] ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(), max_features=1.0, @@ -286,7 +286,7 @@ def test_bootstrap_features(): random_state=rng).fit(X_train, y_train) for features in ensemble.estimators_features_: - assert_greater(boston.data.shape[1], np.unique(features).shape[0]) + assert boston.data.shape[1] > np.unique(features).shape[0] def test_probability(): @@ -338,7 +338,7 @@ def test_oob_score_classification(): test_score = clf.score(X_test, y_test) - assert_less(abs(test_score - clf.oob_score_), 0.1) + assert abs(test_score - clf.oob_score_) < 0.1 # Test with few estimators assert_warns(UserWarning, @@ -367,7 +367,7 @@ def test_oob_score_regression(): test_score = clf.score(X_test, y_test) - assert_less(abs(test_score - clf.oob_score_), 0.1) + assert abs(test_score - clf.oob_score_) < 0.1 # Test with few estimators assert_warns(UserWarning, @@ -616,13 +616,13 @@ def test_warm_start(random_state=42): else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) - assert_equal(len(clf_ws), n_estimators) + assert len(clf_ws) == n_estimators clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) - assert_equal(set([tree.random_state for tree in clf_ws]), + assert (set([tree.random_state for tree in clf_ws]) == set([tree.random_state for tree in clf_no_ws])) @@ -700,7 +700,7 @@ def test_oob_score_consistency(): bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5, max_features=0.5, oob_score=True, random_state=1) - assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_) + assert bagging.fit(X, y).oob_score_ == bagging.fit(X, y).oob_score_ def test_estimators_samples(): @@ -719,9 +719,9 @@ def test_estimators_samples(): estimators = bagging.estimators_ # Test for correct formatting - assert_equal(len(estimators_samples), len(estimators)) - assert_equal(len(estimators_samples[0]), len(X) // 2) - assert_equal(estimators_samples[0].dtype.kind, 'i') + assert len(estimators_samples) == len(estimators) + assert len(estimators_samples[0]) == len(X) // 2 + assert estimators_samples[0].dtype.kind == 'i' # Re-fit single estimator to test for consistent sampling estimator_index = 0 @@ -776,7 +776,7 @@ def test_max_samples_consistency(): max_samples=max_samples, max_features=0.5, random_state=1) bagging.fit(X, y) - assert_equal(bagging._max_samples, max_samples) + assert bagging._max_samples == max_samples def test_set_oob_score_label_encoding(): @@ -793,7 +793,7 @@ def test_set_oob_score_label_encoding(): random_state=random_state).fit(X, Y2).oob_score_ x3 = BaggingClassifier(oob_score=True, random_state=random_state).fit(X, Y3).oob_score_ - assert_equal([x1, x2], [x3, x3]) + assert [x1, x2] == [x3, x3] def replace(X): @@ -829,7 +829,7 @@ def test_bagging_regressor_with_missing_inputs(): pipeline.fit(X, y).predict(X) bagging_regressor = BaggingRegressor(pipeline) y_hat = bagging_regressor.fit(X, y).predict(X) - assert_equal(y.shape, y_hat.shape) + assert y.shape == y_hat.shape # Verify that exceptions can be raised by wrapper regressor regressor = DecisionTreeRegressor() @@ -857,7 +857,7 @@ def test_bagging_classifier_with_missing_inputs(): bagging_classifier = BaggingClassifier(pipeline) bagging_classifier.fit(X, y) y_hat = bagging_classifier.predict(X) - assert_equal(y.shape, y_hat.shape) + assert y.shape == y_hat.shape bagging_classifier.predict_log_proba(X) bagging_classifier.predict_proba(X) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index d283aadf65d73..7cd2124359e72 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -36,14 +36,14 @@ def test_base(): ensemble._make_estimator(random_state=random_state) ensemble._make_estimator(append=False) - assert_equal(3, len(ensemble)) - assert_equal(3, len(ensemble.estimators_)) + assert 3 == len(ensemble) + assert 3 == len(ensemble.estimators_) assert isinstance(ensemble[0], Perceptron) - assert_equal(ensemble[0].random_state, None) + assert ensemble[0].random_state == None assert isinstance(ensemble[1].random_state, int) assert isinstance(ensemble[2].random_state, int) - assert_not_equal(ensemble[1].random_state, ensemble[2].random_state) + assert ensemble[1].random_state != ensemble[2].random_state np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3), n_estimators=np.int32(3)) @@ -82,7 +82,7 @@ def test_set_random_states(): _set_random_states(LinearDiscriminantAnalysis(), random_state=17) clf1 = Perceptron(tol=1e-3, random_state=None) - assert_equal(clf1.random_state, None) + assert clf1.random_state == None # check random_state is None still sets _set_random_states(clf1, None) assert isinstance(clf1.random_state, int) @@ -92,7 +92,7 @@ def test_set_random_states(): assert isinstance(clf1.random_state, int) clf2 = Perceptron(tol=1e-3, random_state=None) _set_random_states(clf2, 3) - assert_equal(clf1.random_state, clf2.random_state) + assert clf1.random_state == clf2.random_state # nested random_state @@ -105,7 +105,7 @@ def make_steps(): _set_random_states(est1, 3) assert isinstance(est1.steps[0][1].estimator.random_state, int) assert isinstance(est1.steps[1][1].random_state, int) - assert_not_equal(est1.get_params()['sel__estimator__random_state'], + assert (est1.get_params()['sel__estimator__random_state'] != est1.get_params()['clf__random_state']) # ensure multiple random_state parameters are invariant to get_params() @@ -124,7 +124,7 @@ def get_params(self, *args, **kwargs): for cls in [AlphaParamPipeline, RevParamPipeline]: est2 = cls(make_steps()) _set_random_states(est2, 3) - assert_equal(est1.get_params()['sel__estimator__random_state'], + assert (est1.get_params()['sel__estimator__random_state'] == est2.get_params()['sel__estimator__random_state']) - assert_equal(est1.get_params()['clf__random_state'], + assert (est1.get_params()['clf__random_state'] == est2.get_params()['clf__random_state']) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index e23d812611681..93b3309ba5a1a 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -119,16 +119,16 @@ def check_classification_toy(name): clf = ForestClassifier(n_estimators=10, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(10, len(clf)) + assert 10 == len(clf) clf = ForestClassifier(n_estimators=10, max_features=1, random_state=1) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(10, len(clf)) + assert 10 == len(clf) # also test apply leaf_indices = clf.apply(X) - assert_equal(leaf_indices.shape, (len(X), clf.n_estimators)) + assert leaf_indices.shape == (len(X), clf.n_estimators) @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) @@ -144,15 +144,15 @@ def check_iris_criterion(name, criterion): random_state=1) clf.fit(iris.data, iris.target) score = clf.score(iris.data, iris.target) - assert_greater(score, 0.9, "Failed with criterion %s and score = %f" - % (criterion, score)) + assert score > 0.9, ("Failed with criterion %s and score = %f" + % (criterion, score)) clf = ForestClassifier(n_estimators=10, criterion=criterion, max_features=2, random_state=1) clf.fit(iris.data, iris.target) score = clf.score(iris.data, iris.target) - assert_greater(score, 0.5, "Failed with criterion %s and score = %f" - % (criterion, score)) + assert score > 0.5, ("Failed with criterion %s and score = %f" + % (criterion, score)) @pytest.mark.parametrize('name', FOREST_CLASSIFIERS) @@ -169,15 +169,15 @@ def check_boston_criterion(name, criterion): random_state=1) clf.fit(boston.data, boston.target) score = clf.score(boston.data, boston.target) - assert_greater(score, 0.94, "Failed with max_features=None, criterion %s " - "and score = %f" % (criterion, score)) + assert score > 0.94, ("Failed with max_features=None, criterion %s " + "and score = %f" % (criterion, score)) clf = ForestRegressor(n_estimators=5, criterion=criterion, max_features=6, random_state=1) clf.fit(boston.data, boston.target) score = clf.score(boston.data, boston.target) - assert_greater(score, 0.95, "Failed with max_features=6, criterion %s " - "and score = %f" % (criterion, score)) + assert score > 0.95, ("Failed with max_features=6, criterion %s " + "and score = %f" % (criterion, score)) @pytest.mark.parametrize('name', FOREST_REGRESSORS) @@ -235,8 +235,8 @@ def check_importances(name, criterion, dtype, tolerance): # The forest estimator can detect that only the first 3 features of the # dataset are informative: n_important = np.sum(importances > 0.1) - assert_equal(importances.shape[0], 10) - assert_equal(n_important, 3) + assert importances.shape[0] == 10 + assert n_important == 3 assert np.all(importances[:3] > 0.1) # Check with parallel @@ -257,7 +257,7 @@ def check_importances(name, criterion, dtype, tolerance): criterion=criterion) est.fit(X, y, sample_weight=scale * sample_weight) importances_bis = est.feature_importances_ - assert_less(np.abs(importances - importances_bis).mean(), tolerance) + assert np.abs(importances - importances_bis).mean() < tolerance @pytest.mark.parametrize('dtype', (np.float64, np.float32)) @@ -364,7 +364,7 @@ def mdi_importance(X_m, X, y): # Check correctness assert_almost_equal(entropy(y), sum(importances)) - assert_less(np.abs(true_importances - importances).mean(), 0.01) + assert np.abs(true_importances - importances).mean() < 0.01 @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -387,10 +387,10 @@ def check_oob_score(name, X, y, n_estimators=20): test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:]) if name in FOREST_CLASSIFIERS: - assert_less(abs(test_score - est.oob_score_), 0.1) + assert abs(test_score - est.oob_score_) < 0.1 else: - assert_greater(test_score, est.oob_score_) - assert_greater(est.oob_score_, .8) + assert test_score > est.oob_score_ + assert est.oob_score_ > .8 # Check warning if not enough estimators with np.errstate(divide="ignore", invalid="ignore"): @@ -464,7 +464,7 @@ def check_parallel(name, X, y): forest = ForestEstimator(n_estimators=10, n_jobs=3, random_state=0) forest.fit(X, y) - assert_equal(len(forest), 10) + assert len(forest) == 10 forest.set_params(n_jobs=1) y1 = forest.predict(X) @@ -493,9 +493,9 @@ def check_pickle(name, X, y): pickle_object = pickle.dumps(obj) obj2 = pickle.loads(pickle_object) - assert_equal(type(obj2), obj.__class__) + assert type(obj2) == obj.__class__ score2 = obj2.score(X, y) - assert_equal(score, score2) + assert score == score2 @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS) @@ -577,7 +577,7 @@ def check_classes_shape(name): # Classification, single output clf = ForestClassifier(random_state=0).fit(X, y) - assert_equal(clf.n_classes_, 2) + assert clf.n_classes_ == 2 assert_array_equal(clf.classes_, [-1, 1]) # Classification, multi-output @@ -603,7 +603,7 @@ def test_random_trees_dense_type(): X_transformed = hasher.fit_transform(X) # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix - assert_equal(type(X_transformed), np.ndarray) + assert type(X_transformed) == np.ndarray def test_random_trees_dense_equal(): @@ -640,13 +640,13 @@ def test_random_hasher(): X_transformed.toarray()) # one leaf active per data point per forest - assert_equal(X_transformed.shape[0], X.shape[0]) + assert X_transformed.shape[0] == X.shape[0] assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators) svd = TruncatedSVD(n_components=2) X_reduced = svd.fit_transform(X_transformed) linear_clf = LinearSVC() linear_clf.fit(X_reduced, y) - assert_equal(linear_clf.score(X_reduced, y), 1.) + assert linear_clf.score(X_reduced, y) == 1. def test_random_hasher_sparse_data(): @@ -700,13 +700,13 @@ def test_distribution(): # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of # them has probability 1/3 while the 4 others have probability 1/6. - assert_equal(len(uniques), 5) - assert_greater(0.20, uniques[0][0]) # Rough approximation of 1/6. - assert_greater(0.20, uniques[1][0]) - assert_greater(0.20, uniques[2][0]) - assert_greater(0.20, uniques[3][0]) - assert_greater(uniques[4][0], 0.3) - assert_equal(uniques[4][1], "0,1/0,0/--0,2/--") + assert len(uniques) == 5 + assert 0.20 > uniques[0][0] # Rough approximation of 1/6. + assert 0.20 > uniques[1][0] + assert 0.20 > uniques[2][0] + assert 0.20 > uniques[3][0] + assert uniques[4][0] > 0.3 + assert uniques[4][1] == "0,1/0,0/--0,2/--" # Two variables, one with 2 values, one with 3 values X = np.empty((1000, 2)) @@ -725,7 +725,7 @@ def test_distribution(): uniques[tree] += 1 uniques = [(count, tree) for tree, count in uniques.items()] - assert_equal(len(uniques), 8) + assert len(uniques) == 8 def check_max_leaf_nodes_max_depth(name): @@ -735,11 +735,11 @@ def check_max_leaf_nodes_max_depth(name): ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(max_depth=1, max_leaf_nodes=4, n_estimators=1, random_state=0).fit(X, y) - assert_equal(est.estimators_[0].get_depth(), 1) + assert est.estimators_[0].get_depth() == 1 est = ForestEstimator(max_depth=1, n_estimators=1, random_state=0).fit(X, y) - assert_equal(est.estimators_[0].get_depth(), 1) + assert est.estimators_[0].get_depth() == 1 @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -764,8 +764,7 @@ def check_min_samples_split(name): node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] - assert_greater(np.min(node_samples), len(X) * 0.5 - 1, - "Failed with {0}".format(name)) + assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name) est = ForestEstimator(min_samples_split=0.5, n_estimators=1, random_state=0) @@ -773,8 +772,7 @@ def check_min_samples_split(name): node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] - assert_greater(np.min(node_samples), len(X) * 0.5 - 1, - "Failed with {0}".format(name)) + assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name) @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -800,8 +798,7 @@ def check_min_samples_leaf(name): node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] - assert_greater(np.min(leaf_count), 4, - "Failed with {0}".format(name)) + assert np.min(leaf_count) > 4, "Failed with {0}".format(name) est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1, random_state=0) @@ -810,8 +807,7 @@ def check_min_samples_leaf(name): node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] - assert_greater(np.min(leaf_count), len(X) * 0.25 - 1, - "Failed with {0}".format(name)) + assert np.min(leaf_count) > len(X) * 0.25 - 1, "Failed with {0}".format(name) @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -842,12 +838,12 @@ def check_min_weight_fraction_leaf(name): node_weights = np.bincount(out, weights=weights) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_greater_equal( - np.min(leaf_weights), - total_weight * est.min_weight_fraction_leaf, - "Failed with {0} " - "min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf)) + assert_message = ("Failed with {0} " + "min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf)) + assert ( + np.min(leaf_weights) >= + total_weight * est.min_weight_fraction_leaf), assert_message @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -1084,13 +1080,13 @@ def check_warm_start(name, random_state=42): else: clf_ws.set_params(n_estimators=n_estimators) clf_ws.fit(X, y) - assert_equal(len(clf_ws), n_estimators) + assert len(clf_ws) == n_estimators clf_no_ws = ForestEstimator(n_estimators=10, random_state=random_state, warm_start=False) clf_no_ws.fit(X, y) - assert_equal(set([tree.random_state for tree in clf_ws]), + assert (set([tree.random_state for tree in clf_ws]) == set([tree.random_state for tree in clf_no_ws])) assert_array_equal(clf_ws.apply(X), clf_no_ws.apply(X), @@ -1182,7 +1178,7 @@ def check_warm_start_oob(name): clf_2.fit(X, y) assert hasattr(clf_2, 'oob_score_') - assert_equal(clf.oob_score_, clf_2.oob_score_) + assert clf.oob_score_ == clf_2.oob_score_ # Test that oob_score is computed even if we don't need to train # additional trees. @@ -1194,7 +1190,7 @@ def check_warm_start_oob(name): clf_3.set_params(oob_score=True) ignore_warnings(clf_3.fit)(X, y) - assert_equal(clf.oob_score_, clf_3.oob_score_) + assert clf.oob_score_ == clf_3.oob_score_ @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS) @@ -1222,8 +1218,8 @@ def check_decision_path(name): est.fit(X, y) indicator, n_nodes_ptr = est.decision_path(X) - assert_equal(indicator.shape[1], n_nodes_ptr[-1]) - assert_equal(indicator.shape[0], n_samples) + assert indicator.shape[1] == n_nodes_ptr[-1] + assert indicator.shape[0] == n_samples assert_array_equal(np.diff(n_nodes_ptr), [e.tree_.node_count for e in est.estimators_]) @@ -1252,7 +1248,7 @@ def test_min_impurity_split(): est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_: - assert_equal(tree.min_impurity_split, 0.1) + assert tree.min_impurity_split == 0.1 def test_min_impurity_decrease(): @@ -1266,7 +1262,7 @@ def test_min_impurity_decrease(): for tree in est.estimators_: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param - assert_equal(tree.min_impurity_decrease, 0.1) + assert tree.min_impurity_decrease == 0.1 class MyBackend(DEFAULT_JOBLIB_BACKEND): diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 1f32d1afbb371..aa041073157a4 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -77,13 +77,13 @@ def check_classification_toy(presort, loss): clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(10, len(clf.estimators_)) + assert 10 == len(clf.estimators_) deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:]) assert np.any(deviance_decrease >= 0.0) leaves = clf.apply(X) - assert_equal(leaves.shape, (6, 10, 1)) + assert leaves.shape == (6, 10, 1) @pytest.mark.parametrize('presort', ('auto', True, False)) @@ -212,7 +212,7 @@ def check_classification_synthetic(presort, loss): learning_rate=1.0, random_state=0) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) - assert_less(error_rate, 0.09) + assert error_rate < 0.09 gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2, max_depth=1, loss=loss, @@ -221,7 +221,7 @@ def check_classification_synthetic(presort, loss): presort=presort) gbrt.fit(X_train, y_train) error_rate = (1.0 - gbrt.score(X_test, y_test)) - assert_less(error_rate, 0.08) + assert error_rate < 0.08 @pytest.mark.parametrize('presort', ('auto', True, False)) @@ -248,11 +248,11 @@ def check_boston(presort, loss, subsample): clf.fit(boston.data, boston.target, sample_weight=sample_weight) leaves = clf.apply(boston.data) - assert_equal(leaves.shape, (506, 100)) + assert leaves.shape == (506, 100) y_pred = clf.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) - assert_less(mse, 6.0) + assert mse < 6.0 if last_y_pred is not None: assert_array_almost_equal(last_y_pred, y_pred) @@ -276,10 +276,10 @@ def check_iris(presort, subsample, sample_weight): presort=presort) clf.fit(iris.data, iris.target, sample_weight=sample_weight) score = clf.score(iris.data, iris.target) - assert_greater(score, 0.9) + assert score > 0.9 leaves = clf.apply(iris.data) - assert_equal(leaves.shape, (150, 100, 3)) + assert leaves.shape == (150, 100, 3) @pytest.mark.parametrize('presort', ('auto', True, False)) @@ -310,7 +310,7 @@ def test_regression_synthetic(): clf = GradientBoostingRegressor(presort=presort) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) - assert_less(mse, 5.0) + assert mse < 5.0 # Friedman2 X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state) @@ -322,7 +322,7 @@ def test_regression_synthetic(): clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) - assert_less(mse, 1700.0) + assert mse < 1700.0 # Friedman3 X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state) @@ -334,7 +334,7 @@ def test_regression_synthetic(): clf = GradientBoostingRegressor(**regression_params) clf.fit(X_train, y_train) mse = mean_squared_error(y_test, clf.predict(X_test)) - assert_less(mse, 0.015) + assert mse < 0.015 def test_feature_importances(): @@ -499,28 +499,28 @@ def test_max_feature_auto(): gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto') gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, int(np.sqrt(n_features))) + assert gbrt.max_features_ == int(np.sqrt(n_features)) gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto') gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, n_features) + assert gbrt.max_features_ == n_features gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3) gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, int(n_features * 0.3)) + assert gbrt.max_features_ == int(n_features * 0.3) gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt') gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, int(np.sqrt(n_features))) + assert gbrt.max_features_ == int(np.sqrt(n_features)) gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2') gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, int(np.log2(n_features))) + assert gbrt.max_features_ == int(np.log2(n_features)) gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.01 / X.shape[1]) gbrt.fit(X_train, y_train) - assert_equal(gbrt.max_features_, 1) + assert gbrt.max_features_ == 1 def test_staged_predict(): @@ -540,7 +540,7 @@ def test_staged_predict(): # test if prediction for last stage equals ``predict`` for y in clf.staged_predict(X_test): - assert_equal(y.shape, y_pred.shape) + assert y.shape == y_pred.shape assert_array_almost_equal(y_pred, y) @@ -561,14 +561,14 @@ def test_staged_predict_proba(): # test if prediction for last stage equals ``predict`` for y_pred in clf.staged_predict(X_test): - assert_equal(y_test.shape, y_pred.shape) + assert y_test.shape == y_pred.shape assert_array_equal(clf.predict(X_test), y_pred) # test if prediction for last stage equals ``predict_proba`` for staged_proba in clf.staged_predict_proba(X_test): - assert_equal(y_test.shape[0], staged_proba.shape[0]) - assert_equal(2, staged_proba.shape[1]) + assert y_test.shape[0] == staged_proba.shape[0] + assert 2 == staged_proba.shape[1] assert_array_almost_equal(clf.predict_proba(X_test), staged_proba) @@ -598,7 +598,7 @@ def test_serialization(): clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) try: import cPickle as pickle @@ -609,7 +609,7 @@ def test_serialization(): clf = None clf = pickle.loads(serialized_clf) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) def test_degenerate_targets(): @@ -651,7 +651,7 @@ def test_symbol_labels(): clf.fit(X, symbol_y) assert_array_equal(clf.predict(T), tosequence(map(str, true_result))) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) def test_float_class_labels(): @@ -663,7 +663,7 @@ def test_float_class_labels(): clf.fit(X, float_y) assert_array_equal(clf.predict(T), np.asarray(true_result, dtype=np.float32)) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) def test_shape_y(): @@ -678,7 +678,7 @@ def test_shape_y(): # later tests, and the tests that check for this warning fail assert_warns(DataConversionWarning, clf.fit, X, y_) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) def test_mem_layout(): @@ -687,27 +687,27 @@ def test_mem_layout(): clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X_, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) X_ = np.ascontiguousarray(X) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X_, y) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) y_ = np.asarray(y, dtype=np.int32) y_ = np.ascontiguousarray(y_) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y_) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) y_ = np.asarray(y, dtype=np.int32) y_ = np.asfortranarray(y_) clf = GradientBoostingClassifier(n_estimators=100, random_state=1) clf.fit(X, y_) assert_array_equal(clf.predict(T), true_result) - assert_equal(100, len(clf.estimators_)) + assert 100 == len(clf.estimators_) def test_oob_improvement(): @@ -715,7 +715,7 @@ def test_oob_improvement(): clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5) clf.fit(X, y) - assert_equal(clf.oob_improvement_.shape[0], 100) + assert clf.oob_improvement_.shape[0] == 100 # hard-coded regression test - change if modification in OOB computation assert_array_almost_equal(clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), @@ -736,8 +736,8 @@ def test_oob_multilcass_iris(): random_state=1, subsample=0.5) clf.fit(iris.data, iris.target) score = clf.score(iris.data, iris.target) - assert_greater(score, 0.9) - assert_equal(clf.oob_improvement_.shape[0], clf.n_estimators) + assert score > 0.9 + assert clf.oob_improvement_.shape[0] == clf.n_estimators # hard-coded regression test - change if modification in OOB computation # FIXME: the following snippet does not yield the same results on 32 bits # assert_array_almost_equal(clf.oob_improvement_[:5], @@ -764,11 +764,11 @@ def test_verbose_output(): # with OOB true_header = ' '.join(['%10s'] + ['%16s'] * 3) % ( 'Iter', 'Train Loss', 'OOB Improve', 'Remaining Time') - assert_equal(true_header, header) + assert true_header == header n_lines = sum(1 for l in verbose_output.readlines()) # one for 1-10 and then 9 for 20-100 - assert_equal(10 + 9, n_lines) + assert 10 + 9 == n_lines def test_more_verbose_output(): @@ -789,11 +789,11 @@ def test_more_verbose_output(): # no OOB true_header = ' '.join(['%10s'] + ['%16s'] * 2) % ( 'Iter', 'Train Loss', 'Remaining Time') - assert_equal(true_header, header) + assert true_header == header n_lines = sum(1 for l in verbose_output.readlines()) # 100 lines for n_estimators==100 - assert_equal(100, n_lines) + assert 100 == n_lines @pytest.mark.parametrize('Cls', GRADIENT_BOOSTING_ESTIMATORS) @@ -843,9 +843,9 @@ def test_warm_start_max_depth(Cls): est.fit(X, y) # last 10 trees have different depth - assert_equal(est.estimators_[0, 0].max_depth, 1) + assert est.estimators_[0, 0].max_depth == 1 for i in range(1, 11): - assert_equal(est.estimators_[-i, 0].max_depth, 2) + assert est.estimators_[-i, 0].max_depth == 2 @pytest.mark.parametrize('Cls', GRADIENT_BOOSTING_ESTIMATORS) @@ -994,33 +994,33 @@ def test_monitor_early_stopping(Cls): est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5) est.fit(X, y, monitor=early_stopping_monitor) - assert_equal(est.n_estimators, 20) # this is not altered - assert_equal(est.estimators_.shape[0], 10) - assert_equal(est.train_score_.shape[0], 10) - assert_equal(est.oob_improvement_.shape[0], 10) + assert est.n_estimators == 20 # this is not altered + assert est.estimators_.shape[0] == 10 + assert est.train_score_.shape[0] == 10 + assert est.oob_improvement_.shape[0] == 10 # try refit est.set_params(n_estimators=30) est.fit(X, y) - assert_equal(est.n_estimators, 30) - assert_equal(est.estimators_.shape[0], 30) - assert_equal(est.train_score_.shape[0], 30) + assert est.n_estimators == 30 + assert est.estimators_.shape[0] == 30 + assert est.train_score_.shape[0] == 30 est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5, warm_start=True) est.fit(X, y, monitor=early_stopping_monitor) - assert_equal(est.n_estimators, 20) - assert_equal(est.estimators_.shape[0], 10) - assert_equal(est.train_score_.shape[0], 10) - assert_equal(est.oob_improvement_.shape[0], 10) + assert est.n_estimators == 20 + assert est.estimators_.shape[0] == 10 + assert est.train_score_.shape[0] == 10 + assert est.oob_improvement_.shape[0] == 10 # try refit est.set_params(n_estimators=30, warm_start=False) est.fit(X, y) - assert_equal(est.n_estimators, 30) - assert_equal(est.train_score_.shape[0], 30) - assert_equal(est.estimators_.shape[0], 30) - assert_equal(est.oob_improvement_.shape[0], 30) + assert est.n_estimators == 30 + assert est.train_score_.shape[0] == 30 + assert est.estimators_.shape[0] == 30 + assert est.oob_improvement_.shape[0] == 30 def test_complete_classification(): @@ -1034,8 +1034,8 @@ def test_complete_classification(): est.fit(X, y) tree = est.estimators_[0, 0].tree_ - assert_equal(tree.max_depth, k) - assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0], + assert tree.max_depth == k + assert (tree.children_left[tree.children_left == TREE_LEAF].shape[0] == k + 1) @@ -1049,7 +1049,7 @@ def test_complete_regression(): est.fit(boston.data, boston.target) tree = est.estimators_[-1, 0].tree_ - assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0], + assert (tree.children_left[tree.children_left == TREE_LEAF].shape[0] == k + 1) @@ -1077,7 +1077,7 @@ def test_zero_estimator_clf(): random_state=1, init='zero') est.fit(X, y) - assert_greater(est.score(X, y), 0.96) + assert est.score(X, y) > 0.96 # binary clf mask = y != 0 @@ -1086,7 +1086,7 @@ def test_zero_estimator_clf(): est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='zero') est.fit(X, y) - assert_greater(est.score(X, y), 0.96) + assert est.score(X, y) > 0.96 est = GradientBoostingClassifier(n_estimators=20, max_depth=1, random_state=1, init='foobar') @@ -1102,11 +1102,11 @@ def test_max_leaf_nodes_max_depth(GBEstimator): est = GBEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y) tree = est.estimators_[0, 0].tree_ - assert_equal(tree.max_depth, 1) + assert tree.max_depth == 1 est = GBEstimator(max_depth=1).fit(X, y) tree = est.estimators_[0, 0].tree_ - assert_equal(tree.max_depth, 1) + assert tree.max_depth == 1 @pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS) @@ -1119,7 +1119,7 @@ def test_min_impurity_split(GBEstimator): est = assert_warns_message(DeprecationWarning, "min_impurity_decrease", est.fit, X, y) for tree in est.estimators_.flat: - assert_equal(tree.min_impurity_split, 0.1) + assert tree.min_impurity_split == 0.1 @pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS) @@ -1131,7 +1131,7 @@ def test_min_impurity_decrease(GBEstimator): for tree in est.estimators_.flat: # Simply check if the parameter is passed on correctly. Tree tests # will suffice for the actual working of this param - assert_equal(tree.min_impurity_decrease, 0.1) + assert tree.min_impurity_decrease == 0.1 def test_warm_start_wo_nestimators_change(): @@ -1139,9 +1139,9 @@ def test_warm_start_wo_nestimators_change(): # Regression test for #3513. clf = GradientBoostingClassifier(n_estimators=10, warm_start=True) clf.fit([[0, 1], [2, 3]], [0, 1]) - assert_equal(clf.estimators_.shape[0], 10) + assert clf.estimators_.shape[0] == 10 clf.fit([[0, 1], [2, 3]], [0, 1]) - assert_equal(clf.estimators_.shape[0], 10) + assert clf.estimators_.shape[0] == 10 def test_probability_exponential(): @@ -1178,7 +1178,7 @@ def test_non_uniform_weights_toy_edge_case_reg(): gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2, loss=loss) gb.fit(X, y, sample_weight=sample_weight) - assert_greater(gb.predict([[1, 0]])[0], 0.5) + assert gb.predict([[1, 0]])[0] > 0.5 def test_non_uniform_weights_toy_edge_case_clf(): @@ -1271,7 +1271,7 @@ def test_gradient_boosting_early_stopping(): (gbr, 1e-3, 28)): est.set_params(tol=tol) est.fit(X_train, y_train) - assert_equal(est.n_estimators_, early_stop_n_estimators) + assert est.n_estimators_ == early_stop_n_estimators assert est.score(X_test, y_test) > 0.7 # Without early stopping diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index a82dbab4e7464..d7fbc4a986469 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -27,7 +27,7 @@ def test_binomial_deviance(): bd = BinomialDeviance(2) # pred has the same BD for y in {0, 1} - assert_equal(bd(np.array([0.0]), np.array([0.0])), + assert (bd(np.array([0.0]), np.array([0.0])) == bd(np.array([1.0]), np.array([0.0]))) assert_almost_equal(bd(np.array([1.0, 1.0, 1.0]), @@ -93,12 +93,12 @@ def test_sample_weight_init_estimators(): init_est = loss.init_estimator() init_est.fit(X, y) out = loss.get_init_raw_predictions(X, init_est) - assert_equal(out.shape, (y.shape[0], 1)) + assert out.shape == (y.shape[0], 1) sw_init_est = loss.init_estimator() sw_init_est.fit(X, y, sample_weight=sample_weight) sw_out = loss.get_init_raw_predictions(X, sw_init_est) - assert_equal(sw_out.shape, (y.shape[0], 1)) + assert sw_out.shape == (y.shape[0], 1) # check if predictions match assert_allclose(out, sw_out, rtol=1e-2) diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index bd44421b2782c..298e0e422cce5 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -137,22 +137,22 @@ def test_recalculate_max_depth(): X = iris.data clf = IsolationForest().fit(X) for est in clf.estimators_: - assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0])))) + assert est.max_depth == int(np.ceil(np.log2(X.shape[0]))) def test_max_samples_attribute(): X = iris.data clf = IsolationForest().fit(X) - assert_equal(clf.max_samples_, X.shape[0]) + assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=500) assert_warns_message(UserWarning, "max_samples will be set to n_samples for estimation", clf.fit, X) - assert_equal(clf.max_samples_, X.shape[0]) + assert clf.max_samples_ == X.shape[0] clf = IsolationForest(max_samples=0.4).fit(X) - assert_equal(clf.max_samples_, 0.4*X.shape[0]) + assert clf.max_samples_ == 0.4*X.shape[0] def test_iforest_parallel_regression(): @@ -200,7 +200,7 @@ def test_iforest_performance(): y_pred = - clf.decision_function(X_test) # check that there is at most 6 errors (false positive or false negative) - assert_greater(roc_auc_score(y_test, y_pred), 0.98) + assert roc_auc_score(y_test, y_pred) > 0.98 @pytest.mark.parametrize("contamination", [0.25, "auto"]) @@ -214,7 +214,7 @@ def test_iforest_works(contamination): decision_func = -clf.decision_function(X) pred = clf.predict(X) # assert detect outliers: - assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2])) + assert np.min(decision_func[-2:]) > np.max(decision_func[:-2]) assert_array_equal(pred, 6 * [1] + 2 * [-1]) @@ -222,7 +222,7 @@ def test_max_samples_consistency(): # Make sure validated max_samples in iforest and BaseBagging are identical X = iris.data clf = IsolationForest().fit(X) - assert_equal(clf.max_samples_, clf._max_samples) + assert clf.max_samples_ == clf._max_samples def test_iforest_subsampled_features(): diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index 0122692202938..be29d1fbcff64 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -109,9 +109,9 @@ def test_tie_situation(): clf2 = RandomForestClassifier(random_state=123) eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='hard') - assert_equal(clf1.fit(X, y).predict(X)[73], 2) - assert_equal(clf2.fit(X, y).predict(X)[73], 1) - assert_equal(eclf.fit(X, y).predict(X)[73], 1) + assert clf1.fit(X, y).predict(X)[73] == 2 + assert clf2.fit(X, y).predict(X)[73] == 1 + assert eclf.fit(X, y).predict(X)[73] == 1 def test_weights_iris(): @@ -174,21 +174,21 @@ def test_predict_on_toy_problem(): y = np.array([1, 1, 1, 2, 2, 2]) - assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) - assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) - assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) + assert all(clf1.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) + assert all(clf2.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) + assert all(clf3.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard', weights=[1, 1, 1]) - assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) + assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) eclf = VotingClassifier(estimators=[ ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft', weights=[1, 1, 1]) - assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2])) + assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2]) def test_predict_proba_on_toy_problem(): @@ -370,15 +370,15 @@ def test_set_params(): assert_array_equal(eclf1.predict(X), eclf2.predict(X)) assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X)) - assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params()) - assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params()) + assert eclf2.estimators[0][1].get_params() == clf1.get_params() + assert eclf2.estimators[1][1].get_params() == clf2.get_params() eclf1.set_params(lr__C=10.0) eclf2.set_params(nb__max_depth=5) assert eclf1.estimators[0][1].get_params()['C'] == 10.0 assert eclf2.estimators[1][1].get_params()['max_depth'] == 5 - assert_equal(eclf1.get_params()["lr__C"], + assert (eclf1.get_params()["lr__C"] == eclf1.get_params()["lr"].get_params()['C']) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 5b1c38e16d00e..6f03754345519 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -91,8 +91,8 @@ def test_classification_toy(): clf.fit(X, y_class) assert_array_equal(clf.predict(T), y_t_class) assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_) - assert_equal(clf.predict_proba(T).shape, (len(T), 2)) - assert_equal(clf.decision_function(T).shape, (len(T),)) + assert clf.predict_proba(T).shape == (len(T), 2) + assert clf.decision_function(T).shape == (len(T),) def test_regression_toy(): @@ -116,17 +116,17 @@ def test_iris(): if alg == "SAMME": clf_samme = clf prob_samme = proba - assert_equal(proba.shape[1], len(classes)) - assert_equal(clf.decision_function(iris.data).shape[1], len(classes)) + assert proba.shape[1] == len(classes) + assert clf.decision_function(iris.data).shape[1] == len(classes) score = clf.score(iris.data, iris.target) assert score > 0.9, "Failed with algorithm %s and score = %f" % \ (alg, score) # Check we used multiple estimators - assert_greater(len(clf.estimators_), 1) + assert len(clf.estimators_) > 1 # Check for distinct random states (see issue #7408) - assert_equal(len(set(est.random_state for est in clf.estimators_)), + assert (len(set(est.random_state for est in clf.estimators_)) == len(clf.estimators_)) # Somewhat hacky regression test: prior to @@ -147,7 +147,7 @@ def test_boston(): # Check we used multiple estimators assert len(reg.estimators_) > 1 # Check for distinct random states (see issue #7408) - assert_equal(len(set(est.random_state for est in reg.estimators_)), + assert (len(set(est.random_state for est in reg.estimators_)) == len(reg.estimators_)) @@ -171,11 +171,11 @@ def test_staged_predict(): s for s in clf.staged_score( iris.data, iris.target, sample_weight=iris_weights)] - assert_equal(len(staged_predictions), 10) + assert len(staged_predictions) == 10 assert_array_almost_equal(predictions, staged_predictions[-1]) - assert_equal(len(staged_probas), 10) + assert len(staged_probas) == 10 assert_array_almost_equal(proba, staged_probas[-1]) - assert_equal(len(staged_scores), 10) + assert len(staged_scores) == 10 assert_array_almost_equal(score, staged_scores[-1]) # AdaBoost regression @@ -189,9 +189,9 @@ def test_staged_predict(): s for s in clf.staged_score( boston.data, boston.target, sample_weight=boston_weights)] - assert_equal(len(staged_predictions), 10) + assert len(staged_predictions) == 10 assert_array_almost_equal(predictions, staged_predictions[-1]) - assert_equal(len(staged_scores), 10) + assert len(staged_scores) == 10 assert_array_almost_equal(score, staged_scores[-1]) @@ -226,9 +226,9 @@ def test_pickle(): s = pickle.dumps(obj) obj2 = pickle.loads(s) - assert_equal(type(obj2), obj.__class__) + assert type(obj2) == obj.__class__ score2 = obj2.score(iris.data, iris.target) - assert_equal(score, score2) + assert score == score2 # Adaboost regressor obj = AdaBoostRegressor(random_state=0) @@ -237,9 +237,9 @@ def test_pickle(): s = pickle.dumps(obj) obj2 = pickle.loads(s) - assert_equal(type(obj2), obj.__class__) + assert type(obj2) == obj.__class__ score2 = obj2.score(boston.data, boston.target) - assert_equal(score, score2) + assert score == score2 def test_importances(): @@ -258,8 +258,8 @@ def test_importances(): clf.fit(X, y) importances = clf.feature_importances_ - assert_equal(importances.shape[0], 10) - assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(), + assert importances.shape[0] == 10 + assert ((importances[:3, np.newaxis] >= importances[3:]).all() == True) @@ -480,7 +480,7 @@ def predict(self, X): boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3) boost.fit(X, y_regr) - assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_)) + assert len(boost.estimator_weights_) == len(boost.estimator_errors_) def test_multidimensional_X(): From 626b672b51c5e4496250100f090c084dcf7cbf1c Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:37:14 +0200 Subject: [PATCH 06/22] fix feature_extraction --- .../tests/test_dict_vectorizer.py | 20 +- .../tests/test_feature_hasher.py | 26 +-- .../feature_extraction/tests/test_image.py | 24 +- sklearn/feature_extraction/tests/test_text.py | 208 +++++++++--------- 4 files changed, 139 insertions(+), 139 deletions(-) diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py index 876f3083e3484..8acd0bcadb160 100644 --- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py +++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py @@ -27,10 +27,10 @@ def test_dictvectorizer(sparse, dtype, sort, iterable): v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort) X = v.fit_transform(iter(D) if iterable else D) - assert_equal(sp.issparse(X), sparse) - assert_equal(X.shape, (3, 5)) - assert_equal(X.sum(), 14) - assert_equal(v.inverse_transform(X), D) + assert sp.issparse(X) == sparse + assert X.shape == (3, 5) + assert X.sum() == 14 + assert v.inverse_transform(X) == D if sparse: # CSR matrices can't be compared for equality @@ -41,7 +41,7 @@ def test_dictvectorizer(sparse, dtype, sort, iterable): else D)) if sort: - assert_equal(v.feature_names_, + assert (v.feature_names_ == sorted(v.feature_names_)) @@ -59,7 +59,7 @@ def test_feature_selection(): sel = SelectKBest(chi2, k=2).fit(X, [0, 1]) v.restrict(sel.get_support(indices=indices), indices=indices) - assert_equal(v.get_feature_names(), ["useful1", "useful2"]) + assert v.get_feature_names() == ["useful1", "useful2"] def test_one_of_k(): @@ -68,10 +68,10 @@ def test_one_of_k(): {"version=3": True, "spam": -1}] v = DictVectorizer() X = v.fit_transform(D_in) - assert_equal(X.shape, (3, 5)) + assert X.shape == (3, 5) D_out = v.inverse_transform(X) - assert_equal(D_out[0], {"version=1": 1, "ham": 2}) + assert D_out[0] == {"version=1": 1, "ham": 2} names = v.get_feature_names() assert "version=2" in names @@ -96,7 +96,7 @@ def test_unseen_or_no_features(): try: v.transform([]) except ValueError as e: - assert_in("empty", str(e)) + assert "empty" in str(e) def test_deterministic_vocabulary(): @@ -111,4 +111,4 @@ def test_deterministic_vocabulary(): v_1 = DictVectorizer().fit([d_sorted]) v_2 = DictVectorizer().fit([d_shuffled]) - assert_equal(v_1.vocabulary_, v_2.vocabulary_) + assert v_1.vocabulary_ == v_2.vocabulary_ diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py index 75cc907dd8f28..ae683df2ed933 100644 --- a/sklearn/feature_extraction/tests/test_feature_hasher.py +++ b/sklearn/feature_extraction/tests/test_feature_hasher.py @@ -11,7 +11,7 @@ def test_feature_hasher_dicts(): h = FeatureHasher(n_features=16) - assert_equal("dict", h.input_type) + assert "dict" == h.input_type raw_X = [{"foo": "bar", "dada": 42, "tzara": 37}, {"foo": "baz", "gaga": "string1"}] @@ -35,13 +35,13 @@ def test_feature_hasher_strings(): alternate_sign=False) X = h.transform(it) - assert_equal(X.shape[0], len(raw_X)) - assert_equal(X.shape[1], n_features) + assert X.shape[0] == len(raw_X) + assert X.shape[1] == n_features - assert_equal(X[0].sum(), 4) - assert_equal(X[1].sum(), 3) + assert X[0].sum() == 4 + assert X[1].sum() == 3 - assert_equal(X.nnz, 6) + assert X.nnz == 6 def test_feature_hasher_pairs(): @@ -51,8 +51,8 @@ def test_feature_hasher_pairs(): x1, x2 = h.transform(raw_X).toarray() x1_nz = sorted(np.abs(x1[x1 != 0])) x2_nz = sorted(np.abs(x2[x2 != 0])) - assert_equal([1, 2], x1_nz) - assert_equal([1, 3, 4], x2_nz) + assert [1, 2] == x1_nz + assert [1, 3, 4] == x2_nz def test_feature_hasher_pairs_with_string_values(): @@ -62,16 +62,16 @@ def test_feature_hasher_pairs_with_string_values(): x1, x2 = h.transform(raw_X).toarray() x1_nz = sorted(np.abs(x1[x1 != 0])) x2_nz = sorted(np.abs(x2[x2 != 0])) - assert_equal([1, 1], x1_nz) - assert_equal([1, 1, 4], x2_nz) + assert [1, 1] == x1_nz + assert [1, 1, 4] == x2_nz raw_X = (iter(d.items()) for d in [{"bax": "abc"}, {"bax": "abc"}]) x1, x2 = h.transform(raw_X).toarray() x1_nz = np.abs(x1[x1 != 0]) x2_nz = np.abs(x2[x2 != 0]) - assert_equal([1], x1_nz) - assert_equal([1], x2_nz) + assert [1] == x1_nz + assert [1] == x2_nz assert_array_equal(x1, x2) @@ -107,7 +107,7 @@ def test_hasher_set_params(): def test_hasher_zeros(): # Assert that no zeros are materialized in the output. X = FeatureHasher().transform([{'foo': 0}]) - assert_equal(X.data.shape, (0,)) + assert X.data.shape == (0,) @ignore_warnings(category=DeprecationWarning) diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py index 92b18310469b4..d66e06a44029d 100644 --- a/sklearn/feature_extraction/tests/test_image.py +++ b/sklearn/feature_extraction/tests/test_image.py @@ -18,7 +18,7 @@ def test_img_to_graph(): x, y = np.mgrid[:4, :4] - 10 grad_x = img_to_graph(x) grad_y = img_to_graph(y) - assert_equal(grad_x.nnz, grad_y.nnz) + assert grad_x.nnz == grad_y.nnz # Negative elements are the diagonal: the elements of the original # image. Positive elements are the values of the gradient, they # should all be equal on grad_x and grad_y @@ -68,7 +68,7 @@ def test_connect_regions(): for thr in (50, 150): mask = face > thr graph = img_to_graph(face, mask) - assert_equal(ndimage.label(mask)[1], connected_components(graph)[0]) + assert ndimage.label(mask)[1] == connected_components(graph)[0] @ignore_warnings(category=DeprecationWarning) # scipy deprecation inside face @@ -85,11 +85,11 @@ def test_connect_regions_with_grid(): mask = face > 50 graph = grid_to_graph(*face.shape, mask=mask) - assert_equal(ndimage.label(mask)[1], connected_components(graph)[0]) + assert ndimage.label(mask)[1] == connected_components(graph)[0] mask = face > 150 graph = grid_to_graph(*face.shape, mask=mask, dtype=None) - assert_equal(ndimage.label(mask)[1], connected_components(graph)[0]) + assert ndimage.label(mask)[1] == connected_components(graph)[0] def _downsampled_face(): @@ -138,7 +138,7 @@ def test_extract_patches_all(): p_h, p_w = 16, 16 expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1) patches = extract_patches_2d(face, (p_h, p_w)) - assert_equal(patches.shape, (expected_n_patches, p_h, p_w)) + assert patches.shape == (expected_n_patches, p_h, p_w) def test_extract_patches_all_color(): @@ -147,7 +147,7 @@ def test_extract_patches_all_color(): p_h, p_w = 16, 16 expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1) patches = extract_patches_2d(face, (p_h, p_w)) - assert_equal(patches.shape, (expected_n_patches, p_h, p_w, 3)) + assert patches.shape == (expected_n_patches, p_h, p_w, 3) def test_extract_patches_all_rect(): @@ -158,7 +158,7 @@ def test_extract_patches_all_rect(): expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1) patches = extract_patches_2d(face, (p_h, p_w)) - assert_equal(patches.shape, (expected_n_patches, p_h, p_w)) + assert patches.shape == (expected_n_patches, p_h, p_w) def test_extract_patches_max_patches(): @@ -167,11 +167,11 @@ def test_extract_patches_max_patches(): p_h, p_w = 16, 16 patches = extract_patches_2d(face, (p_h, p_w), max_patches=100) - assert_equal(patches.shape, (100, p_h, p_w)) + assert patches.shape == (100, p_h, p_w) expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1)) patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5) - assert_equal(patches.shape, (expected_n_patches, p_h, p_w)) + assert patches.shape == (expected_n_patches, p_h, p_w) assert_raises(ValueError, extract_patches_2d, face, (p_h, p_w), max_patches=2.0) @@ -184,7 +184,7 @@ def test_extract_patch_same_size_image(): # Request patches of the same size as image # Should return just the single patch a.k.a. the image patches = extract_patches_2d(face, face.shape, max_patches=2) - assert_equal(patches.shape[0], 1) + assert patches.shape[0] == 1 def test_extract_patches_less_than_max_patches(): @@ -195,7 +195,7 @@ def test_extract_patches_less_than_max_patches(): expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1) patches = extract_patches_2d(face, (p_h, p_w), max_patches=4000) - assert_equal(patches.shape, (expected_n_patches, p_h, p_w)) + assert patches.shape == (expected_n_patches, p_h, p_w) def test_reconstruct_patches_perfect(): @@ -247,7 +247,7 @@ def test_patch_extractor_max_patches_default(): faces = face_collection extr = PatchExtractor(max_patches=100, random_state=0) patches = extr.transform(faces) - assert_equal(patches.shape, (len(faces) * 100, 19, 25)) + assert patches.shape == (len(faces) * 100, 19, 25) def test_patch_extractor_all_patches(): diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 73a240547d81d..0bca5b5506253 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -83,42 +83,42 @@ def test_strip_accents(): # check some classical latin accentuated symbols a = 'àáâãäåçèéêë' expected = 'aaaaaaceeee' - assert_equal(strip_accents_unicode(a), expected) + assert strip_accents_unicode(a) == expected a = 'ìíîïñòóôõöùúûüý' expected = 'iiiinooooouuuuy' - assert_equal(strip_accents_unicode(a), expected) + assert strip_accents_unicode(a) == expected # check some arabic a = '\u0625' # alef with a hamza below: إ expected = '\u0627' # simple alef: ا - assert_equal(strip_accents_unicode(a), expected) + assert strip_accents_unicode(a) == expected # mix letters accentuated and not a = "this is à test" expected = 'this is a test' - assert_equal(strip_accents_unicode(a), expected) + assert strip_accents_unicode(a) == expected def test_to_ascii(): # check some classical latin accentuated symbols a = 'àáâãäåçèéêë' expected = 'aaaaaaceeee' - assert_equal(strip_accents_ascii(a), expected) + assert strip_accents_ascii(a) == expected a = "ìíîïñòóôõöùúûüý" expected = 'iiiinooooouuuuy' - assert_equal(strip_accents_ascii(a), expected) + assert strip_accents_ascii(a) == expected # check some arabic a = '\u0625' # halef with a hamza below expected = '' # halef has no direct ascii match - assert_equal(strip_accents_ascii(a), expected) + assert strip_accents_ascii(a) == expected # mix letters accentuated and not a = "this is à test" expected = 'this is a test' - assert_equal(strip_accents_ascii(a), expected) + assert strip_accents_ascii(a) == expected @pytest.mark.parametrize('Vectorizer', (CountVectorizer, HashingVectorizer)) @@ -128,18 +128,18 @@ def test_word_analyzer_unigrams(Vectorizer): "c'était pas très bon.") expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi', 'etait', 'pas', 'tres', 'bon'] - assert_equal(wa(text), expected) + assert wa(text) == expected text = "This is a test, really.\n\n I met Harry yesterday." expected = ['this', 'is', 'test', 'really', 'met', 'harry', 'yesterday'] - assert_equal(wa(text), expected) + assert wa(text) == expected wa = Vectorizer(input='file').build_analyzer() text = StringIO("This is a test with a file-like object!") expected = ['this', 'is', 'test', 'with', 'file', 'like', 'object'] - assert_equal(wa(text), expected) + assert wa(text) == expected # with custom preprocessor wa = Vectorizer(preprocessor=uppercase).build_analyzer() @@ -147,7 +147,7 @@ def test_word_analyzer_unigrams(Vectorizer): " c'était pas très bon.") expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI', 'ETAIT', 'PAS', 'TRES', 'BON'] - assert_equal(wa(text), expected) + assert wa(text) == expected # with custom tokenizer wa = Vectorizer(tokenizer=split_tokenize, @@ -156,7 +156,7 @@ def test_word_analyzer_unigrams(Vectorizer): "c'était pas très bon.") expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,', "c'etait", 'pas', 'tres', 'bon.'] - assert_equal(wa(text), expected) + assert wa(text) == expected def test_word_analyzer_unigrams_and_bigrams(): @@ -168,7 +168,7 @@ def test_word_analyzer_unigrams_and_bigrams(): 'etait', 'pas', 'tres', 'bon', 'ai mange', 'mange du', 'du kangourou', 'kangourou ce', 'ce midi', 'midi etait', 'etait pas', 'pas tres', 'tres bon'] - assert_equal(wa(text), expected) + assert wa(text) == expected def test_unicode_decode_error(): @@ -193,22 +193,22 @@ def test_char_ngram_analyzer(): text = "J'ai mangé du kangourou ce midi, c'était pas très bon" expected = ["j'a", "'ai", 'ai ', 'i m', ' ma'] - assert_equal(cnga(text)[:5], expected) + assert cnga(text)[:5] == expected expected = ['s tres', ' tres ', 'tres b', 'res bo', 'es bon'] - assert_equal(cnga(text)[-5:], expected) + assert cnga(text)[-5:] == expected text = "This \n\tis a test, really.\n\n I met Harry yesterday" expected = ['thi', 'his', 'is ', 's i', ' is'] - assert_equal(cnga(text)[:5], expected) + assert cnga(text)[:5] == expected expected = [' yeste', 'yester', 'esterd', 'sterda', 'terday'] - assert_equal(cnga(text)[-5:], expected) + assert cnga(text)[-5:] == expected cnga = CountVectorizer(input='file', analyzer='char', ngram_range=(3, 6)).build_analyzer() text = StringIO("This is a test with a file-like object!") expected = ['thi', 'his', 'is ', 's i', ' is'] - assert_equal(cnga(text)[:5], expected) + assert cnga(text)[:5] == expected def test_char_wb_ngram_analyzer(): @@ -217,16 +217,16 @@ def test_char_wb_ngram_analyzer(): text = "This \n\tis a test, really.\n\n I met Harry yesterday" expected = [' th', 'thi', 'his', 'is ', ' thi'] - assert_equal(cnga(text)[:5], expected) + assert cnga(text)[:5] == expected expected = ['yester', 'esterd', 'sterda', 'terday', 'erday '] - assert_equal(cnga(text)[-5:], expected) + assert cnga(text)[-5:] == expected cnga = CountVectorizer(input='file', analyzer='char_wb', ngram_range=(3, 6)).build_analyzer() text = StringIO("A test with a file-like object!") expected = [' a ', ' te', 'tes', 'est', 'st ', ' tes'] - assert_equal(cnga(text)[:6], expected) + assert cnga(text)[:6] == expected def test_word_ngram_analyzer(): @@ -235,17 +235,17 @@ def test_word_ngram_analyzer(): text = "This \n\tis a test, really.\n\n I met Harry yesterday" expected = ['this is test', 'is test really', 'test really met'] - assert_equal(cnga(text)[:3], expected) + assert cnga(text)[:3] == expected expected = ['test really met harry yesterday', 'this is test really met harry', 'is test really met harry yesterday'] - assert_equal(cnga(text)[-3:], expected) + assert cnga(text)[-3:] == expected cnga_file = CountVectorizer(input='file', analyzer='word', ngram_range=(3, 6)).build_analyzer() file = StringIO(text) - assert_equal(cnga_file(file), cnga(text)) + assert cnga_file(file) == cnga(text) def test_countvectorizer_custom_vocabulary(): @@ -258,11 +258,11 @@ def test_countvectorizer_custom_vocabulary(): vect = CountVectorizer(vocabulary=v) vect.fit(JUNK_FOOD_DOCS) if isinstance(v, Mapping): - assert_equal(vect.vocabulary_, vocab) + assert vect.vocabulary_ == vocab else: - assert_equal(set(vect.vocabulary_), terms) + assert set(vect.vocabulary_) == terms X = vect.transform(JUNK_FOOD_DOCS) - assert_equal(X.shape[1], len(terms)) + assert X.shape[1] == len(terms) def test_countvectorizer_custom_vocabulary_pipeline(): @@ -271,9 +271,9 @@ def test_countvectorizer_custom_vocabulary_pipeline(): ('count', CountVectorizer(vocabulary=what_we_like)), ('tfidf', TfidfTransformer())]) X = pipe.fit_transform(ALL_FOOD_DOCS) - assert_equal(set(pipe.named_steps['count'].vocabulary_), + assert (set(pipe.named_steps['count'].vocabulary_) == set(what_we_like)) - assert_equal(X.shape[1], len(what_we_like)) + assert X.shape[1] == len(what_we_like) def test_countvectorizer_custom_vocabulary_repeated_indices(): @@ -281,7 +281,7 @@ def test_countvectorizer_custom_vocabulary_repeated_indices(): try: CountVectorizer(vocabulary=vocab) except ValueError as e: - assert_in("vocabulary contains repeated indices", str(e).lower()) + assert "vocabulary contains repeated indices" in str(e).lower() def test_countvectorizer_custom_vocabulary_gap_index(): @@ -289,20 +289,20 @@ def test_countvectorizer_custom_vocabulary_gap_index(): try: CountVectorizer(vocabulary=vocab) except ValueError as e: - assert_in("doesn't contain index", str(e).lower()) + assert "doesn't contain index" in str(e).lower() def test_countvectorizer_stop_words(): cv = CountVectorizer() cv.set_params(stop_words='english') - assert_equal(cv.get_stop_words(), ENGLISH_STOP_WORDS) + assert cv.get_stop_words() == ENGLISH_STOP_WORDS cv.set_params(stop_words='_bad_str_stop_') assert_raises(ValueError, cv.get_stop_words) cv.set_params(stop_words='_bad_unicode_stop_') assert_raises(ValueError, cv.get_stop_words) stoplist = ['some', 'other', 'words'] cv.set_params(stop_words=stoplist) - assert_equal(cv.get_stop_words(), set(stoplist)) + assert cv.get_stop_words() == set(stoplist) def test_countvectorizer_empty_vocabulary(): @@ -311,7 +311,7 @@ def test_countvectorizer_empty_vocabulary(): vect.fit(["foo"]) assert False, "we shouldn't get here" except ValueError as e: - assert_in("empty vocabulary", str(e).lower()) + assert "empty vocabulary" in str(e).lower() try: v = CountVectorizer(max_df=1.0, stop_words="english") @@ -319,14 +319,14 @@ def test_countvectorizer_empty_vocabulary(): v.fit(["to be or not to be", "and me too", "and so do you"]) assert False, "we shouldn't get here" except ValueError as e: - assert_in("empty vocabulary", str(e).lower()) + assert "empty vocabulary" in str(e).lower() def test_fit_countvectorizer_twice(): cv = CountVectorizer() X1 = cv.fit_transform(ALL_FOOD_DOCS[:5]) X2 = cv.fit_transform(ALL_FOOD_DOCS[5:]) - assert_not_equal(X1.shape[1], X2.shape[1]) + assert X1.shape[1] != X2.shape[1] def test_tf_idf_smoothing(): @@ -383,11 +383,11 @@ def test_sublinear_tf(): X = [[1], [2], [3]] tr = TfidfTransformer(sublinear_tf=True, use_idf=False, norm=None) tfidf = tr.fit_transform(X).toarray() - assert_equal(tfidf[0], 1) - assert_greater(tfidf[1], tfidf[0]) - assert_greater(tfidf[2], tfidf[1]) - assert_less(tfidf[1], 2) - assert_less(tfidf[2], 3) + assert tfidf[0] == 1 + assert tfidf[1] > tfidf[0] + assert tfidf[2] > tfidf[1] + assert tfidf[1] < 2 + assert tfidf[2] < 3 def test_vectorizer(): @@ -401,7 +401,7 @@ def test_vectorizer(): counts_train = v1.fit_transform(train_data) if hasattr(counts_train, 'tocsr'): counts_train = counts_train.tocsr() - assert_equal(counts_train[0, v1.vocabulary_["pizza"]], 2) + assert counts_train[0, v1.vocabulary_["pizza"]] == 2 # build a vectorizer v1 with the same vocabulary as the one fitted by v1 v2 = CountVectorizer(vocabulary=v1.vocabulary_) @@ -413,9 +413,9 @@ def test_vectorizer(): counts_test = counts_test.tocsr() vocabulary = v.vocabulary_ - assert_equal(counts_test[0, vocabulary["salad"]], 1) - assert_equal(counts_test[0, vocabulary["tomato"]], 1) - assert_equal(counts_test[0, vocabulary["water"]], 1) + assert counts_test[0, vocabulary["salad"]] == 1 + assert counts_test[0, vocabulary["tomato"]] == 1 + assert counts_test[0, vocabulary["water"]] == 1 # stop word from the fixed list assert "the" not in vocabulary @@ -427,20 +427,20 @@ def test_vectorizer(): assert "copyright" not in vocabulary # not present in the sample - assert_equal(counts_test[0, vocabulary["coke"]], 0) - assert_equal(counts_test[0, vocabulary["burger"]], 0) - assert_equal(counts_test[0, vocabulary["beer"]], 0) - assert_equal(counts_test[0, vocabulary["pizza"]], 0) + assert counts_test[0, vocabulary["coke"]] == 0 + assert counts_test[0, vocabulary["burger"]] == 0 + assert counts_test[0, vocabulary["beer"]] == 0 + assert counts_test[0, vocabulary["pizza"]] == 0 # test tf-idf t1 = TfidfTransformer(norm='l1') tfidf = t1.fit(counts_train).transform(counts_train).toarray() - assert_equal(len(t1.idf_), len(v1.vocabulary_)) - assert_equal(tfidf.shape, (n_train, len(v1.vocabulary_))) + assert len(t1.idf_) == len(v1.vocabulary_) + assert tfidf.shape == (n_train, len(v1.vocabulary_)) # test tf-idf with new data tfidf_test = t1.transform(counts_test).toarray() - assert_equal(tfidf_test.shape, (len(test_data), len(v1.vocabulary_))) + assert tfidf_test.shape == (len(test_data), len(v1.vocabulary_)) # test tf alone t2 = TfidfTransformer(norm='l1', use_idf=False) @@ -482,7 +482,7 @@ def test_vectorizer(): # ascii preprocessor? v3.set_params(strip_accents='ascii', lowercase=False) - assert_equal(v3.build_preprocessor(), strip_accents_ascii) + assert v3.build_preprocessor() == strip_accents_ascii # error on bad strip_accents param v3.set_params(strip_accents='_gabbledegook_', preprocessor=None) @@ -497,7 +497,7 @@ def test_tfidf_vectorizer_setters(): tv = TfidfVectorizer(norm='l2', use_idf=False, smooth_idf=False, sublinear_tf=False) tv.norm = 'l1' - assert_equal(tv._tfidf.norm, 'l1') + assert tv._tfidf.norm == 'l1' tv.use_idf = True assert tv._tfidf.use_idf tv.smooth_idf = True @@ -511,8 +511,8 @@ def test_hashing_vectorizer(): v = HashingVectorizer() X = v.transform(ALL_FOOD_DOCS) token_nnz = X.nnz - assert_equal(X.shape, (len(ALL_FOOD_DOCS), v.n_features)) - assert_equal(X.dtype, v.dtype) + assert X.shape == (len(ALL_FOOD_DOCS), v.n_features) + assert X.dtype == v.dtype # By default the hashed values receive a random sign and l2 normalization # makes the feature values bounded @@ -528,8 +528,8 @@ def test_hashing_vectorizer(): # Check vectorization with some non-default parameters v = HashingVectorizer(ngram_range=(1, 2), norm='l1') X = v.transform(ALL_FOOD_DOCS) - assert_equal(X.shape, (len(ALL_FOOD_DOCS), v.n_features)) - assert_equal(X.dtype, v.dtype) + assert X.shape == (len(ALL_FOOD_DOCS), v.n_features) + assert X.dtype == v.dtype # ngrams generate more non zeros ngrams_nnz = X.nnz @@ -555,16 +555,16 @@ def test_feature_names(): # test for vocabulary learned from data X = cv.fit_transform(ALL_FOOD_DOCS) n_samples, n_features = X.shape - assert_equal(len(cv.vocabulary_), n_features) + assert len(cv.vocabulary_) == n_features feature_names = cv.get_feature_names() - assert_equal(len(feature_names), n_features) + assert len(feature_names) == n_features assert_array_equal(['beer', 'burger', 'celeri', 'coke', 'pizza', 'salad', 'sparkling', 'tomato', 'water'], feature_names) for idx, name in enumerate(feature_names): - assert_equal(idx, cv.vocabulary_.get(name)) + assert idx == cv.vocabulary_.get(name) # test for custom vocabulary vocab = ['beer', 'burger', 'celeri', 'coke', 'pizza', @@ -577,7 +577,7 @@ def test_feature_names(): assert cv.fixed_vocabulary_ for idx, name in enumerate(feature_names): - assert_equal(idx, cv.vocabulary_.get(name)) + assert idx == cv.vocabulary_.get(name) @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer)) @@ -589,8 +589,8 @@ def test_vectorizer_max_features(Vectorizer): # test bounded number of extracted features vectorizer = Vectorizer(max_df=0.6, max_features=4) vectorizer.fit(ALL_FOOD_DOCS) - assert_equal(set(vectorizer.vocabulary_), expected_vocabulary) - assert_equal(vectorizer.stop_words_, expected_stop_words) + assert set(vectorizer.vocabulary_) == expected_vocabulary + assert vectorizer.stop_words_ == expected_stop_words def test_count_vectorizer_max_features(): @@ -609,14 +609,14 @@ def test_count_vectorizer_max_features(): features_None = cv_None.get_feature_names() # The most common feature is "the", with frequency 7. - assert_equal(7, counts_1.max()) - assert_equal(7, counts_3.max()) - assert_equal(7, counts_None.max()) + assert 7 == counts_1.max() + assert 7 == counts_3.max() + assert 7 == counts_None.max() # The most common feature should be the same - assert_equal("the", features_1[np.argmax(counts_1)]) - assert_equal("the", features_3[np.argmax(counts_3)]) - assert_equal("the", features_None[np.argmax(counts_None)]) + assert "the" == features_1[np.argmax(counts_1)] + assert "the" == features_3[np.argmax(counts_3)] + assert "the" == features_None[np.argmax(counts_None)] def test_vectorizer_max_df(): @@ -624,22 +624,22 @@ def test_vectorizer_max_df(): vect = CountVectorizer(analyzer='char', max_df=1.0) vect.fit(test_data) assert 'a' in vect.vocabulary_.keys() - assert_equal(len(vect.vocabulary_.keys()), 6) - assert_equal(len(vect.stop_words_), 0) + assert len(vect.vocabulary_.keys()) == 6 + assert len(vect.stop_words_) == 0 vect.max_df = 0.5 # 0.5 * 3 documents -> max_doc_count == 1.5 vect.fit(test_data) assert 'a' not in vect.vocabulary_.keys() # {ae} ignored - assert_equal(len(vect.vocabulary_.keys()), 4) # {bcdt} remain + assert len(vect.vocabulary_.keys()) == 4 # {bcdt} remain assert 'a' in vect.stop_words_ - assert_equal(len(vect.stop_words_), 2) + assert len(vect.stop_words_) == 2 vect.max_df = 1 vect.fit(test_data) assert 'a' not in vect.vocabulary_.keys() # {ae} ignored - assert_equal(len(vect.vocabulary_.keys()), 4) # {bcdt} remain + assert len(vect.vocabulary_.keys()) == 4 # {bcdt} remain assert 'a' in vect.stop_words_ - assert_equal(len(vect.stop_words_), 2) + assert len(vect.stop_words_) == 2 def test_vectorizer_min_df(): @@ -647,22 +647,22 @@ def test_vectorizer_min_df(): vect = CountVectorizer(analyzer='char', min_df=1) vect.fit(test_data) assert 'a' in vect.vocabulary_.keys() - assert_equal(len(vect.vocabulary_.keys()), 6) - assert_equal(len(vect.stop_words_), 0) + assert len(vect.vocabulary_.keys()) == 6 + assert len(vect.stop_words_) == 0 vect.min_df = 2 vect.fit(test_data) assert 'c' not in vect.vocabulary_.keys() # {bcdt} ignored - assert_equal(len(vect.vocabulary_.keys()), 2) # {ae} remain + assert len(vect.vocabulary_.keys()) == 2 # {ae} remain assert 'c' in vect.stop_words_ - assert_equal(len(vect.stop_words_), 4) + assert len(vect.stop_words_) == 4 vect.min_df = 0.8 # 0.8 * 3 documents -> min_doc_count == 2.4 vect.fit(test_data) assert 'c' not in vect.vocabulary_.keys() # {bcdet} ignored - assert_equal(len(vect.vocabulary_.keys()), 1) # {a} remains + assert len(vect.vocabulary_.keys()) == 1 # {a} remains assert 'c' in vect.stop_words_ - assert_equal(len(vect.stop_words_), 5) + assert len(vect.stop_words_) == 5 def test_count_binary_occurrences(): @@ -685,7 +685,7 @@ def test_count_binary_occurrences(): vect = CountVectorizer(analyzer='char', max_df=1.0, binary=True, dtype=np.float32) X_sparse = vect.fit_transform(test_data) - assert_equal(X_sparse.dtype, np.float32) + assert X_sparse.dtype == np.float32 @fails_if_pypy @@ -694,23 +694,23 @@ def test_hashed_binary_occurrences(): test_data = ['aaabc', 'abbde'] vect = HashingVectorizer(alternate_sign=False, analyzer='char', norm=None) X = vect.transform(test_data) - assert_equal(np.max(X[0:1].data), 3) - assert_equal(np.max(X[1:2].data), 2) - assert_equal(X.dtype, np.float64) + assert np.max(X[0:1].data) == 3 + assert np.max(X[1:2].data) == 2 + assert X.dtype == np.float64 # using boolean features, we can fetch the binary occurrence info # instead. vect = HashingVectorizer(analyzer='char', alternate_sign=False, binary=True, norm=None) X = vect.transform(test_data) - assert_equal(np.max(X.data), 1) - assert_equal(X.dtype, np.float64) + assert np.max(X.data) == 1 + assert X.dtype == np.float64 # check the ability to change the dtype vect = HashingVectorizer(analyzer='char', alternate_sign=False, binary=True, norm=None, dtype=np.float64) X = vect.transform(test_data) - assert_equal(X.dtype, np.float64) + assert X.dtype == np.float64 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer)) @@ -764,9 +764,9 @@ def test_count_vectorizer_pipeline_grid_selection(): # on this toy dataset bigram representation which is used in the last of # the grid_search is considered the best estimator since they all converge # to 100% accuracy models - assert_equal(grid_search.best_score_, 1.0) + assert grid_search.best_score_ == 1.0 best_vectorizer = grid_search.best_estimator_.named_steps['vect'] - assert_equal(best_vectorizer.ngram_range, (1, 1)) + assert best_vectorizer.ngram_range == (1, 1) def test_vectorizer_pipeline_grid_selection(): @@ -801,10 +801,10 @@ def test_vectorizer_pipeline_grid_selection(): # on this toy dataset bigram representation which is used in the last of # the grid_search is considered the best estimator since they all converge # to 100% accuracy models - assert_equal(grid_search.best_score_, 1.0) + assert grid_search.best_score_ == 1.0 best_vectorizer = grid_search.best_estimator_.named_steps['vect'] - assert_equal(best_vectorizer.ngram_range, (1, 1)) - assert_equal(best_vectorizer.norm, 'l2') + assert best_vectorizer.ngram_range == (1, 1) + assert best_vectorizer.norm == 'l2' assert not best_vectorizer.fixed_vocabulary_ @@ -833,14 +833,14 @@ def test_vectorizer_unicode(): vect = CountVectorizer() X_counted = vect.fit_transform([document]) - assert_equal(X_counted.shape, (1, 12)) + assert X_counted.shape == (1, 12) vect = HashingVectorizer(norm=None, alternate_sign=False) X_hashed = vect.transform([document]) - assert_equal(X_hashed.shape, (1, 2 ** 20)) + assert X_hashed.shape == (1, 2 ** 20) # No collisions on such a small dataset - assert_equal(X_counted.nnz, X_hashed.nnz) + assert X_counted.nnz == X_hashed.nnz # When norm is None and not alternate_sign, the tokens are counted up to # collisions @@ -876,8 +876,8 @@ def test_pickling_vectorizer(): for orig in instances: s = pickle.dumps(orig) copy = pickle.loads(s) - assert_equal(type(copy), orig.__class__) - assert_equal(copy.get_params(), orig.get_params()) + assert type(copy) == orig.__class__ + assert copy.get_params() == orig.get_params() if IS_PYPY and isinstance(orig, HashingVectorizer): continue else: @@ -898,7 +898,7 @@ def test_countvectorizer_vocab_sets_when_pickling(): unpickled_cv = pickle.loads(pickle.dumps(cv)) cv.fit(ALL_FOOD_DOCS) unpickled_cv.fit(ALL_FOOD_DOCS) - assert_equal(cv.get_feature_names(), unpickled_cv.get_feature_names()) + assert cv.get_feature_names() == unpickled_cv.get_feature_names() def test_countvectorizer_vocab_dicts_when_pickling(): @@ -914,7 +914,7 @@ def test_countvectorizer_vocab_dicts_when_pickling(): unpickled_cv = pickle.loads(pickle.dumps(cv)) cv.fit(ALL_FOOD_DOCS) unpickled_cv.fit(ALL_FOOD_DOCS) - assert_equal(cv.get_feature_names(), unpickled_cv.get_feature_names()) + assert cv.get_feature_names() == unpickled_cv.get_feature_names() def test_stop_words_removal(): @@ -944,7 +944,7 @@ def test_pickling_transformer(): orig = TfidfTransformer().fit(X) s = pickle.dumps(orig) copy = pickle.loads(s) - assert_equal(type(copy), orig.__class__) + assert type(copy) == orig.__class__ assert_array_equal( copy.fit_transform(X).toarray(), orig.fit_transform(X).toarray()) @@ -1021,7 +1021,7 @@ def test_vectorizer_vocab_clone(): vect_vocab_clone = clone(vect_vocab) vect_vocab.fit(ALL_FOOD_DOCS) vect_vocab_clone.fit(ALL_FOOD_DOCS) - assert_equal(vect_vocab_clone.vocabulary_, vect_vocab.vocabulary_) + assert vect_vocab_clone.vocabulary_ == vect_vocab.vocabulary_ @pytest.mark.parametrize('Vectorizer', From c64d4475846d6978271d9b0d6d96cf4a1bc12669 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:38:15 +0200 Subject: [PATCH 07/22] fix feature_selection --- sklearn/feature_selection/tests/test_base.py | 16 +++---- .../tests/test_feature_select.py | 36 +++++++-------- .../tests/test_from_model.py | 8 ++-- .../tests/test_mutual_info.py | 8 ++-- sklearn/feature_selection/tests/test_rfe.py | 46 +++++++++---------- .../tests/test_variance_threshold.py | 2 +- 6 files changed, 58 insertions(+), 58 deletions(-) diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index e11b7d94fcebe..f2e3b36d456b5 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -46,8 +46,8 @@ def test_transform_dense(): assert_array_equal(Xt, Xt_actual2) # Check dtype matches - assert_equal(np.int32, sel.transform(X.astype(np.int32)).dtype) - assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype) + assert np.int32 == sel.transform(X.astype(np.int32)).dtype + assert np.float32 == sel.transform(X.astype(np.float32)).dtype # Check 1d list and other dtype: names_t_actual = sel.transform([feature_names]) @@ -66,8 +66,8 @@ def test_transform_sparse(): assert_array_equal(Xt, Xt_actual2.toarray()) # Check dtype matches - assert_equal(np.int32, sel.transform(sparse(X).astype(np.int32)).dtype) - assert_equal(np.float32, sel.transform(sparse(X).astype(np.float32)).dtype) + assert np.int32 == sel.transform(sparse(X).astype(np.int32)).dtype + assert np.float32 == sel.transform(sparse(X).astype(np.float32)).dtype # Check wrong shape raises error assert_raises(ValueError, sel.transform, np.array([[1], [2]])) @@ -79,9 +79,9 @@ def test_inverse_transform_dense(): assert_array_equal(Xinv, Xinv_actual) # Check dtype matches - assert_equal(np.int32, + assert (np.int32 == sel.inverse_transform(Xt.astype(np.int32)).dtype) - assert_equal(np.float32, + assert (np.float32 == sel.inverse_transform(Xt.astype(np.float32)).dtype) # Check 1d list and other dtype: @@ -99,9 +99,9 @@ def test_inverse_transform_sparse(): assert_array_equal(Xinv, Xinv_actual.toarray()) # Check dtype matches - assert_equal(np.int32, + assert (np.int32 == sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype) - assert_equal(np.float32, + assert (np.float32 == sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype) # Check wrong shape raises error diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index ef907d99f9aa0..ac1822e1a6063 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -193,10 +193,10 @@ def test_select_percentile_classif_sparse(): X_r2inv = univariate_filter.inverse_transform(X_r2) assert sparse.issparse(X_r2inv) support_mask = safe_mask(X_r2inv, support) - assert_equal(X_r2inv.shape, X.shape) + assert X_r2inv.shape == X.shape assert_array_equal(X_r2inv[:, support_mask].toarray(), X_r.toarray()) # Check other columns are empty - assert_equal(X_r2inv.getnnz(), X_r.getnnz()) + assert X_r2inv.getnnz() == X_r.getnnz() ############################################################################## @@ -245,7 +245,7 @@ def test_select_kbest_zero(): assert_array_equal(support, gtruth) X_selected = assert_warns_message(UserWarning, 'No features were selected', univariate_filter.transform, X) - assert_equal(X_selected.shape, (20, 0)) + assert X_selected.shape == (20, 0) def test_select_heuristics_classif(): @@ -371,7 +371,7 @@ def test_select_heuristics_regression(): assert_array_equal(X_r, X_r2) support = univariate_filter.get_support() assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool)) - assert_less(np.sum(support[5:] == 1), 3) + assert np.sum(support[5:] == 1) < 3 def test_boundary_case_ch2(): @@ -466,7 +466,7 @@ def test_select_fwe_regression(): gtruth = np.zeros(20) gtruth[:5] = 1 assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool)) - assert_less(np.sum(support[5:] == 1), 2) + assert np.sum(support[5:] == 1) < 2 def test_selectkbest_tiebreaking(): @@ -478,12 +478,12 @@ def test_selectkbest_tiebreaking(): for X in Xs: sel = SelectKBest(dummy_score, k=1) X1 = ignore_warnings(sel.fit_transform)([X], y) - assert_equal(X1.shape[1], 1) + assert X1.shape[1] == 1 assert_best_scores_kept(sel) sel = SelectKBest(dummy_score, k=2) X2 = ignore_warnings(sel.fit_transform)([X], y) - assert_equal(X2.shape[1], 2) + assert X2.shape[1] == 2 assert_best_scores_kept(sel) @@ -495,12 +495,12 @@ def test_selectpercentile_tiebreaking(): for X in Xs: sel = SelectPercentile(dummy_score, percentile=34) X1 = ignore_warnings(sel.fit_transform)([X], y) - assert_equal(X1.shape[1], 1) + assert X1.shape[1] == 1 assert_best_scores_kept(sel) sel = SelectPercentile(dummy_score, percentile=67) X2 = ignore_warnings(sel.fit_transform)([X], y) - assert_equal(X2.shape[1], 2) + assert X2.shape[1] == 2 assert_best_scores_kept(sel) @@ -514,12 +514,12 @@ def test_tied_pvalues(): for perm in itertools.permutations((0, 1, 2)): X = X0[:, perm] Xt = SelectKBest(chi2, k=2).fit_transform(X, y) - assert_equal(Xt.shape, (2, 2)) - assert_not_in(9998, Xt) + assert Xt.shape == (2, 2) + assert 9998 not in Xt Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y) - assert_equal(Xt.shape, (2, 2)) - assert_not_in(9998, Xt) + assert Xt.shape == (2, 2) + assert 9998 not in Xt def test_scorefunc_multilabel(): @@ -529,12 +529,12 @@ def test_scorefunc_multilabel(): y = [[1, 1], [0, 1], [1, 0]] Xt = SelectKBest(chi2, k=2).fit_transform(X, y) - assert_equal(Xt.shape, (3, 2)) - assert_not_in(0, Xt) + assert Xt.shape == (3, 2) + assert 0 not in Xt Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y) - assert_equal(Xt.shape, (3, 2)) - assert_not_in(0, Xt) + assert Xt.shape == (3, 2) + assert 0 not in Xt def test_tied_scores(): @@ -608,7 +608,7 @@ def test_no_feature_selected(): assert_array_equal(selector.get_support(), np.zeros(10)) X_selected = assert_warns_message( UserWarning, 'No features were selected', selector.transform, X) - assert_equal(X_selected.shape, (40, 0)) + assert X_selected.shape == (40, 0) def test_mutual_info_classif(): diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index b6b1b9c91ecf8..a5f61bfa0f061 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -169,7 +169,7 @@ def test_feature_importances(): assert hasattr(transformer.estimator_, 'feature_importances_') X_new = transformer.transform(X) - assert_less(X_new.shape[1], X.shape[1]) + assert X_new.shape[1] < X.shape[1] importances = transformer.estimator_.feature_importances_ feature_mask = np.abs(importances) > func(importances) @@ -228,7 +228,7 @@ def test_2d_coef(): transformer.fit(X, y) assert hasattr(transformer.estimator_, 'coef_') X_new = transformer.transform(X) - assert_less(X_new.shape[1], X.shape[1]) + assert X_new.shape[1] < X.shape[1] # Manually check that the norm is correctly performed est.fit(X, y) @@ -266,7 +266,7 @@ def test_calling_fit_reinitializes(): transformer.fit(data, y) transformer.set_params(estimator__C=100) transformer.fit(data, y) - assert_equal(transformer.estimator_.C, 100) + assert transformer.estimator_.C == 100 # 0.23. warning about tol not having its correct default value. @@ -321,4 +321,4 @@ def test_threshold_without_refitting(): # Set a higher threshold to filter out more features. model.threshold = "1.0 * mean" - assert_greater(X_transform.shape[1], model.transform(data).shape[1]) + assert X_transform.shape[1] > model.transform(data).shape[1] diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py index 0fe437817ed28..4a610bc518f76 100644 --- a/sklearn/feature_selection/tests/test_mutual_info.py +++ b/sklearn/feature_selection/tests/test_mutual_info.py @@ -107,7 +107,7 @@ def test_compute_mi_cd_unique_label(): y = np.hstack((y, 10)) mi_2 = _compute_mi(x, y, True, False) - assert_equal(mi_1, mi_2) + assert mi_1 == mi_2 # We are going test that feature ordering by MI matches our expectations. @@ -166,11 +166,11 @@ def test_mutual_info_classif_mixed(): n_neighbors=n_neighbors, random_state=0) # Check that the continuous values have an higher MI with greater # n_neighbors - assert_greater(mi_nn[0], mi[0]) - assert_greater(mi_nn[1], mi[1]) + assert mi_nn[0] > mi[0] + assert mi_nn[1] > mi[1] # The n_neighbors should not have any effect on the discrete value # The MI should be the same - assert_equal(mi_nn[2], mi[2]) + assert mi_nn[2] == mi[2] def test_mutual_info_options(): diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 9bd4f69fc9784..1ad5375edb6f3 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -66,7 +66,7 @@ def test_rfe_features_importance(): random_state=generator, max_depth=2) rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1) rfe.fit(X, y) - assert_equal(len(rfe.ranking_), X.shape[1]) + assert len(rfe.ranking_) == X.shape[1] clf_svc = SVC(kernel="linear") rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1) @@ -89,7 +89,7 @@ def test_rfe(): rfe.fit(X, y) X_r = rfe.transform(X) clf.fit(X_r, y) - assert_equal(len(rfe.ranking_), X.shape[1]) + assert len(rfe.ranking_) == X.shape[1] # sparse model clf_sparse = SVC(kernel="linear") @@ -97,11 +97,11 @@ def test_rfe(): rfe_sparse.fit(X_sparse, y) X_r_sparse = rfe_sparse.transform(X_sparse) - assert_equal(X_r.shape, iris.data.shape) + assert X_r.shape == iris.data.shape assert_array_almost_equal(X_r[:10], iris.data[:10]) assert_array_almost_equal(rfe.predict(X), clf.predict(iris.data)) - assert_equal(rfe.score(X, y), clf.score(iris.data, iris.target)) + assert rfe.score(X, y) == clf.score(iris.data, iris.target) assert_array_almost_equal(X_r, X_r_sparse.toarray()) @@ -117,8 +117,8 @@ def test_rfe_mockclassifier(): rfe.fit(X, y) X_r = rfe.transform(X) clf.fit(X_r, y) - assert_equal(len(rfe.ranking_), X.shape[1]) - assert_equal(X_r.shape, iris.data.shape) + assert len(rfe.ranking_) == X.shape[1] + assert X_r.shape == iris.data.shape def test_rfecv(): @@ -131,8 +131,8 @@ def test_rfecv(): rfecv = RFECV(estimator=SVC(kernel="linear"), step=1) rfecv.fit(X, y) # non-regression test for missing worst feature: - assert_equal(len(rfecv.grid_scores_), X.shape[1]) - assert_equal(len(rfecv.ranking_), X.shape[1]) + assert len(rfecv.grid_scores_) == X.shape[1] + assert len(rfecv.ranking_) == X.shape[1] X_r = rfecv.transform(X) # All the noisy variable were filtered out @@ -169,13 +169,13 @@ def test_scorer(estimator, X, y): # RFECV is to return the FEWEST features that maximize the CV score. # Because test_scorer always returns 1.0 in this example, RFECV should # reduce the dimensionality to a single feature (i.e. n_features_ = 1) - assert_equal(rfecv.n_features_, 1) + assert rfecv.n_features_ == 1 # Same as the first two tests, but with step=2 rfecv = RFECV(estimator=SVC(kernel="linear"), step=2) rfecv.fit(X, y) - assert_equal(len(rfecv.grid_scores_), 6) - assert_equal(len(rfecv.ranking_), X.shape[1]) + assert len(rfecv.grid_scores_) == 6 + assert len(rfecv.ranking_) == X.shape[1] X_r = rfecv.transform(X) assert_array_equal(X_r, iris.data) @@ -203,8 +203,8 @@ def test_rfecv_mockclassifier(): rfecv = RFECV(estimator=MockClassifier(), step=1) rfecv.fit(X, y) # non-regression test for missing worst feature: - assert_equal(len(rfecv.grid_scores_), X.shape[1]) - assert_equal(len(rfecv.ranking_), X.shape[1]) + assert len(rfecv.grid_scores_) == X.shape[1] + assert len(rfecv.ranking_) == X.shape[1] def test_rfecv_verbose_output(): @@ -223,7 +223,7 @@ def test_rfecv_verbose_output(): verbose_output = sys.stdout verbose_output.seek(0) - assert_greater(len(verbose_output.readline()), 0) + assert len(verbose_output.readline()) > 0 def test_rfecv_grid_scores_size(): @@ -248,11 +248,11 @@ def test_rfecv_grid_scores_size(): def test_rfe_estimator_tags(): rfe = RFE(SVC(kernel='linear')) - assert_equal(rfe._estimator_type, "classifier") + assert rfe._estimator_type == "classifier" # make sure that cross-validation is stratified iris = load_iris() score = cross_val_score(rfe, iris.data, iris.target) - assert_greater(score.min(), .7) + assert score.min() > .7 def test_rfe_min_step(): @@ -264,17 +264,17 @@ def test_rfe_min_step(): # Test when floor(step * n_features) <= 0 selector = RFE(estimator, step=0.01) sel = selector.fit(X, y) - assert_equal(sel.support_.sum(), n_features // 2) + assert sel.support_.sum() == n_features // 2 # Test when step is between (0,1) and floor(step * n_features) > 0 selector = RFE(estimator, step=0.20) sel = selector.fit(X, y) - assert_equal(sel.support_.sum(), n_features // 2) + assert sel.support_.sum() == n_features // 2 # Test when step is an integer selector = RFE(estimator, step=5) sel = selector.fit(X, y) - assert_equal(sel.support_.sum(), n_features // 2) + assert sel.support_.sum() == n_features // 2 def test_number_of_subsets_of_features(): @@ -307,9 +307,9 @@ def formula2(n_features, n_features_to_select, step): n_features_to_select=n_features_to_select, step=step) rfe.fit(X, y) # this number also equals to the maximum of ranking_ - assert_equal(np.max(rfe.ranking_), + assert (np.max(rfe.ranking_) == formula1(n_features, n_features_to_select, step)) - assert_equal(np.max(rfe.ranking_), + assert (np.max(rfe.ranking_) == formula2(n_features, n_features_to_select, step)) # In RFECV, 'fit' calls 'RFE._fit' @@ -331,9 +331,9 @@ def formula2(n_features, n_features_to_select, step): rfecv = RFECV(estimator=SVC(kernel="linear"), step=step) rfecv.fit(X, y) - assert_equal(rfecv.grid_scores_.shape[0], + assert (rfecv.grid_scores_.shape[0] == formula1(n_features, n_features_to_select, step)) - assert_equal(rfecv.grid_scores_.shape[0], + assert (rfecv.grid_scores_.shape[0] == formula2(n_features, n_features_to_select, step)) diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py index 53a90ace37a40..4164caeac2246 100644 --- a/sklearn/feature_selection/tests/test_variance_threshold.py +++ b/sklearn/feature_selection/tests/test_variance_threshold.py @@ -28,7 +28,7 @@ def test_variance_threshold(): # Test VarianceThreshold with custom variance. for X in [data, csr_matrix(data)]: X = VarianceThreshold(threshold=.4).fit_transform(X) - assert_equal((len(data), 1), X.shape) + assert (len(data), 1) == X.shape def test_zero_variance_floating_point_error(): From 344aa108658952aeedf4e7ebbcdf8ca83880ff1c Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:38:54 +0200 Subject: [PATCH 08/22] fix gaussian_processes --- sklearn/gaussian_process/tests/test_gpc.py | 6 ++-- sklearn/gaussian_process/tests/test_gpr.py | 12 +++---- .../gaussian_process/tests/test_kernels.py | 32 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py index dc37a317b1e14..46d2498733d4d 100644 --- a/sklearn/gaussian_process/tests/test_gpc.py +++ b/sklearn/gaussian_process/tests/test_gpc.py @@ -49,7 +49,7 @@ def test_predict_consistent(kernel): def test_lml_improving(kernel): # Test that hyperparameter-tuning improves log-marginal likelihood. gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y) - assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta), + assert (gpc.log_marginal_likelihood(gpc.kernel_.theta) > gpc.log_marginal_likelihood(kernel.theta)) @@ -106,7 +106,7 @@ def test_random_starts(): kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) - assert_greater(lml, last_lml - np.finfo(np.float32).eps) + assert lml > last_lml - np.finfo(np.float32).eps last_lml = lml @@ -129,7 +129,7 @@ def optimizer(obj_func, initial_theta, bounds): gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer) gpc.fit(X, y_mc) # Checks that optimizer improved marginal likelihood - assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta), + assert (gpc.log_marginal_likelihood(gpc.kernel_.theta) > gpc.log_marginal_likelihood(kernel.theta)) diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py index aebe35cfa2a14..6aa8e97168591 100644 --- a/sklearn/gaussian_process/tests/test_gpr.py +++ b/sklearn/gaussian_process/tests/test_gpr.py @@ -57,7 +57,7 @@ def test_gpr_interpolation(kernel): def test_lml_improving(kernel): # Test that hyperparameter-tuning improves log-marginal likelihood. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) - assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta), + assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) > gpr.log_marginal_likelihood(kernel.theta)) @@ -65,7 +65,7 @@ def test_lml_improving(kernel): def test_lml_precomputed(kernel): # Test that lml of optimized kernel is stored correctly. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) - assert_equal(gpr.log_marginal_likelihood(gpr.kernel_.theta), + assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) == gpr.log_marginal_likelihood()) @@ -145,7 +145,7 @@ def test_no_optimizer(): # Test that kernel parameters are unmodified when optimizer is None. kernel = RBF(1.0) gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y) - assert_equal(np.exp(gpr.kernel_.theta), 1.0) + assert np.exp(gpr.kernel_.theta) == 1.0 @pytest.mark.parametrize('kernel', kernels) @@ -168,7 +168,7 @@ def test_anisotropic_kernel(): kernel = RBF([1.0, 1.0]) gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) - assert_greater(np.exp(gpr.kernel_.theta[1]), + assert (np.exp(gpr.kernel_.theta[1]) > np.exp(gpr.kernel_.theta[0]) * 5) @@ -191,7 +191,7 @@ def test_random_starts(): kernel=kernel, n_restarts_optimizer=n_restarts_optimizer, random_state=0,).fit(X, y) lml = gp.log_marginal_likelihood(gp.kernel_.theta) - assert_greater(lml, last_lml - np.finfo(np.float32).eps) + assert lml > last_lml - np.finfo(np.float32).eps last_lml = lml @@ -286,7 +286,7 @@ def optimizer(obj_func, initial_theta, bounds): gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer) gpr.fit(X, y) # Checks that optimizer improved marginal likelihood - assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta), + assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) > gpr.log_marginal_likelihood(gpr.kernel.theta)) diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py index 33a769b852c59..510707cb760fb 100644 --- a/sklearn/gaussian_process/tests/test_kernels.py +++ b/sklearn/gaussian_process/tests/test_kernels.py @@ -53,9 +53,9 @@ def test_kernel_gradient(kernel): # Compare analytic and numeric gradient of kernels. K, K_gradient = kernel(X, eval_gradient=True) - assert_equal(K_gradient.shape[0], X.shape[0]) - assert_equal(K_gradient.shape[1], X.shape[0]) - assert_equal(K_gradient.shape[2], kernel.theta.shape[0]) + assert K_gradient.shape[0] == X.shape[0] + assert K_gradient.shape[1] == X.shape[0] + assert K_gradient.shape[2] == kernel.theta.shape[0] def eval_kernel_for_theta(theta): kernel_clone = kernel.clone_with_theta(theta) @@ -84,15 +84,15 @@ def test_kernel_theta(kernel): args = [p.name for p in init_sign if p.name != 'self'] theta_vars = map(lambda s: s[0:-len("_bounds")], filter(lambda s: s.endswith("_bounds"), args)) - assert_equal( + assert ( set(hyperparameter.name - for hyperparameter in kernel.hyperparameters), + for hyperparameter in kernel.hyperparameters) == set(theta_vars)) # Check that values returned in theta are consistent with # hyperparameter values (being their logarithms) for i, hyperparameter in enumerate(kernel.hyperparameters): - assert_equal(theta[i], + assert (theta[i] == np.log(getattr(kernel, hyperparameter.name))) # Fixed kernel parameters must be excluded from theta and gradient. @@ -105,14 +105,14 @@ def test_kernel_theta(kernel): # Check that theta and K_gradient are identical with the fixed # dimension left out _, K_gradient_new = new_kernel(X, eval_gradient=True) - assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1) - assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1) + assert theta.shape[0] == new_kernel.theta.shape[0] + 1 + assert K_gradient.shape[2] == K_gradient_new.shape[2] + 1 if i > 0: - assert_equal(theta[:i], new_kernel.theta[:i]) + assert theta[:i] == new_kernel.theta[:i] assert_array_equal(K_gradient[..., :i], K_gradient_new[..., :i]) if i + 1 < len(kernel.hyperparameters): - assert_equal(theta[i + 1:], new_kernel.theta[i:]) + assert theta[i + 1:] == new_kernel.theta[i:] assert_array_equal(K_gradient[..., i + 1:], K_gradient_new[..., i:]) @@ -192,7 +192,7 @@ def check_hyperparameters_equal(kernel1, kernel2): if attr.startswith("hyperparameter_"): attr_value1 = getattr(kernel1, attr) attr_value2 = getattr(kernel2, attr) - assert_equal(attr_value1, attr_value2) + assert attr_value1 == attr_value2 @pytest.mark.parametrize("kernel", kernels) @@ -202,11 +202,11 @@ def test_kernel_clone(kernel): # XXX: Should this be fixed? # This differs from the sklearn's estimators equality check. - assert_equal(kernel, kernel_cloned) - assert_not_equal(id(kernel), id(kernel_cloned)) + assert kernel == kernel_cloned + assert id(kernel) != id(kernel_cloned) # Check that all constructor parameters are equal. - assert_equal(kernel.get_params(), kernel_cloned.get_params()) + assert kernel.get_params() == kernel_cloned.get_params() # Check that all hyperparameters are equal. check_hyperparameters_equal(kernel, kernel_cloned) @@ -236,9 +236,9 @@ def test_kernel_clone_after_set_params(kernel): params['length_scale_bounds'] = bounds * 2 kernel_cloned.set_params(**params) kernel_cloned_clone = clone(kernel_cloned) - assert_equal(kernel_cloned_clone.get_params(), + assert (kernel_cloned_clone.get_params() == kernel_cloned.get_params()) - assert_not_equal(id(kernel_cloned_clone), id(kernel_cloned)) + assert id(kernel_cloned_clone) != id(kernel_cloned) check_hyperparameters_equal(kernel_cloned, kernel_cloned_clone) From 103bc56aaaec6d33d19c1820a60852e64e51476d Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:49:12 +0200 Subject: [PATCH 09/22] fix linear_model --- sklearn/linear_model/tests/test_base.py | 72 ++++++++-------- sklearn/linear_model/tests/test_bayes.py | 2 +- .../tests/test_coordinate_descent.py | 48 +++++------ sklearn/linear_model/tests/test_huber.py | 4 +- .../linear_model/tests/test_least_angle.py | 34 ++++---- sklearn/linear_model/tests/test_logistic.py | 48 +++++------ sklearn/linear_model/tests/test_omp.py | 36 ++++---- .../tests/test_passive_aggressive.py | 8 +- sklearn/linear_model/tests/test_perceptron.py | 2 +- sklearn/linear_model/tests/test_ransac.py | 86 +++++++++---------- sklearn/linear_model/tests/test_ridge.py | 54 ++++++------ sklearn/linear_model/tests/test_sag.py | 8 +- sklearn/linear_model/tests/test_sgd.py | 86 +++++++++---------- .../tests/test_sparse_coordinate_descent.py | 18 ++-- sklearn/linear_model/tests/test_theil_sen.py | 10 +-- 15 files changed, 258 insertions(+), 258 deletions(-) diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 1679f9a9c2930..84676151d2a74 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -73,8 +73,8 @@ def test_linear_regression_sample_weights(): coefs1 = reg.coef_ inter1 = reg.intercept_ - assert_equal(reg.coef_.shape, (X.shape[1], )) # sanity checks - assert_greater(reg.score(X, y), 0.5) + assert reg.coef_.shape == (X.shape[1], ) # sanity checks + assert reg.score(X, y) > 0.5 # Closed form of the weighted least square # theta = (X^T W X)^(-1) * X^T W y @@ -130,11 +130,11 @@ def test_fit_intercept(): lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y) lr3_with_intercept = LinearRegression(fit_intercept=True).fit(X3, y) - assert_equal(lr2_with_intercept.coef_.shape, + assert (lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape) - assert_equal(lr3_with_intercept.coef_.shape, + assert (lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape) - assert_equal(lr2_without_intercept.coef_.ndim, + assert (lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim) @@ -183,7 +183,7 @@ def test_linear_regression_multiple_outcome(random_state=0): reg = LinearRegression(fit_intercept=True) reg.fit((X), Y) - assert_equal(reg.coef_.shape, (2, n_features)) + assert reg.coef_.shape == (2, n_features) Y_pred = reg.predict(X) reg.fit(X, y) y_pred = reg.predict(X) @@ -200,7 +200,7 @@ def test_linear_regression_sparse_multiple_outcome(random_state=0): ols = LinearRegression() ols.fit(X, Y) - assert_equal(ols.coef_.shape, (2, n_features)) + assert ols.coef_.shape == (2, n_features) Y_pred = ols.predict(X) ols.fit(X, y.ravel()) y_pred = ols.predict(X) @@ -344,7 +344,7 @@ def test_csr_preprocess_data(): X[X < 2.5] = 0.0 csr = sparse.csr_matrix(X) csr_, y, _, _, _ = _preprocess_data(csr, y, True) - assert_equal(csr_.getformat(), 'csr') + assert csr_.getformat() == 'csr' @pytest.mark.parametrize('is_sparse', (True, False)) @@ -399,34 +399,34 @@ def test_dtype_preprocess_data(): _preprocess_data(X_64, y_32, fit_intercept=fit_intercept, normalize=normalize, return_mean=True)) - assert_equal(Xt_32.dtype, np.float32) - assert_equal(yt_32.dtype, np.float32) - assert_equal(X_mean_32.dtype, np.float32) - assert_equal(y_mean_32.dtype, np.float32) - assert_equal(X_norm_32.dtype, np.float32) - - assert_equal(Xt_64.dtype, np.float64) - assert_equal(yt_64.dtype, np.float64) - assert_equal(X_mean_64.dtype, np.float64) - assert_equal(y_mean_64.dtype, np.float64) - assert_equal(X_norm_64.dtype, np.float64) - - assert_equal(Xt_3264.dtype, np.float32) - assert_equal(yt_3264.dtype, np.float32) - assert_equal(X_mean_3264.dtype, np.float32) - assert_equal(y_mean_3264.dtype, np.float32) - assert_equal(X_norm_3264.dtype, np.float32) - - assert_equal(Xt_6432.dtype, np.float64) - assert_equal(yt_6432.dtype, np.float64) - assert_equal(X_mean_6432.dtype, np.float64) - assert_equal(y_mean_6432.dtype, np.float64) - assert_equal(X_norm_6432.dtype, np.float64) - - assert_equal(X_32.dtype, np.float32) - assert_equal(y_32.dtype, np.float32) - assert_equal(X_64.dtype, np.float64) - assert_equal(y_64.dtype, np.float64) + assert Xt_32.dtype == np.float32 + assert yt_32.dtype == np.float32 + assert X_mean_32.dtype == np.float32 + assert y_mean_32.dtype == np.float32 + assert X_norm_32.dtype == np.float32 + + assert Xt_64.dtype == np.float64 + assert yt_64.dtype == np.float64 + assert X_mean_64.dtype == np.float64 + assert y_mean_64.dtype == np.float64 + assert X_norm_64.dtype == np.float64 + + assert Xt_3264.dtype == np.float32 + assert yt_3264.dtype == np.float32 + assert X_mean_3264.dtype == np.float32 + assert y_mean_3264.dtype == np.float32 + assert X_norm_3264.dtype == np.float32 + + assert Xt_6432.dtype == np.float64 + assert yt_6432.dtype == np.float64 + assert X_mean_6432.dtype == np.float64 + assert y_mean_6432.dtype == np.float64 + assert X_norm_6432.dtype == np.float64 + + assert X_32.dtype == np.float32 + assert y_32.dtype == np.float32 + assert X_64.dtype == np.float64 + assert y_64.dtype == np.float64 assert_array_almost_equal(Xt_32, Xt_64) assert_array_almost_equal(yt_32, yt_64) diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 231260c3847aa..355cd042347af 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -184,7 +184,7 @@ def test_update_of_sigma_in_ard(): clf.fit(X, y) # With the inputs above, ARDRegression prunes one of the two coefficients # in the first iteration. Hence, the expected shape of `sigma_` is (1, 1). - assert_equal(clf.sigma_.shape, (1, 1)) + assert clf.sigma_.shape == (1, 1) # Ensure that no error is thrown at prediction stage clf.predict(X, return_std=True) diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index 5e9088efe1ab9..aa6773fce415b 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -169,7 +169,7 @@ def test_lasso_cv(): clf.mse_path_[5].mean(), significant=2) # test set - assert_greater(clf.score(X_test, y_test), 0.99) + assert clf.score(X_test, y_test) > 0.99 def test_lasso_cv_with_some_model_selection(): @@ -250,7 +250,7 @@ def test_enet_path(): assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso - assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) + assert clf.l1_ratio_ == min(clf.l1_ratio) clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, @@ -262,11 +262,11 @@ def test_enet_path(): assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have selected an elastic-net # that is closer to ridge than to lasso - assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) + assert clf.l1_ratio_ == min(clf.l1_ratio) # We are in well-conditioned settings with low noise: we should # have a good test-set performance - assert_greater(clf.score(X_test, y_test), 0.99) + assert clf.score(X_test, y_test) > 0.99 # Multi-output/target case X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3) @@ -275,8 +275,8 @@ def test_enet_path(): ignore_warnings(clf.fit)(X, y) # We are in well-conditioned settings with low noise: we should # have a good test-set performance - assert_greater(clf.score(X_test, y_test), 0.99) - assert_equal(clf.coef_.shape, (3, 10)) + assert clf.score(X_test, y_test) > 0.99 + assert clf.coef_.shape == (3, 10) # Mono-output should have same cross-validated alpha_ and l1_ratio_ # in both cases. @@ -297,8 +297,8 @@ def test_path_parameters(): l1_ratio=0.5, tol=1e-3) clf.fit(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) - assert_equal(50, clf.n_alphas) - assert_equal(50, len(clf.alphas_)) + assert 50 == clf.n_alphas + assert 50 == len(clf.alphas_) def test_warm_start(): @@ -465,19 +465,19 @@ def test_multitask_enet_and_lasso_cv(): clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100, l1_ratio=[0.3, 0.5], tol=1e-3, cv=3) clf.fit(X, y) - assert_equal(0.5, clf.l1_ratio_) - assert_equal((3, X.shape[1]), clf.coef_.shape) - assert_equal((3, ), clf.intercept_.shape) - assert_equal((2, 10, 3), clf.mse_path_.shape) - assert_equal((2, 10), clf.alphas_.shape) + assert 0.5 == clf.l1_ratio_ + assert (3, X.shape[1]) == clf.coef_.shape + assert (3, ) == clf.intercept_.shape + assert (2, 10, 3) == clf.mse_path_.shape + assert (2, 10) == clf.alphas_.shape X, y, _, _ = build_dataset(n_targets=3) clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3) clf.fit(X, y) - assert_equal((3, X.shape[1]), clf.coef_.shape) - assert_equal((3, ), clf.intercept_.shape) - assert_equal((10, 3), clf.mse_path_.shape) - assert_equal(10, len(clf.alphas_)) + assert (3, X.shape[1]) == clf.coef_.shape + assert (3, ) == clf.intercept_.shape + assert (10, 3) == clf.mse_path_.shape + assert 10 == len(clf.alphas_) def test_1d_multioutput_enet_and_multitask_enet_cv(): @@ -540,20 +540,20 @@ def test_warm_start_convergence(): n_iter_reference = model.n_iter_ # This dataset is not trivial enough for the model to converge in one pass. - assert_greater(n_iter_reference, 2) + assert n_iter_reference > 2 # Check that n_iter_ is invariant to multiple calls to fit # when warm_start=False, all else being equal. model.fit(X, y) n_iter_cold_start = model.n_iter_ - assert_equal(n_iter_cold_start, n_iter_reference) + assert n_iter_cold_start == n_iter_reference # Fit the same model again, using a warm start: the optimizer just performs # a single pass before checking that it has already converged model.set_params(warm_start=True) model.fit(X, y) n_iter_warm_start = model.n_iter_ - assert_equal(n_iter_warm_start, 1) + assert n_iter_warm_start == 1 def test_warm_start_convergence_with_regularizer_decrement(): @@ -568,7 +568,7 @@ def test_warm_start_convergence_with_regularizer_decrement(): # Fitting with high regularization is easier it should converge faster # in general. high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y) - assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_) + assert low_reg_model.n_iter_ > high_reg_model.n_iter_ # Fit the solution to the original, less regularized version of the # problem but from the solution of the highly regularized variant of @@ -577,7 +577,7 @@ def test_warm_start_convergence_with_regularizer_decrement(): warm_low_reg_model = deepcopy(high_reg_model) warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha) warm_low_reg_model.fit(X, y) - assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_) + assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_ def test_random_descent(): @@ -741,7 +741,7 @@ def test_enet_float_precision(): coef[('simple', dtype)] = clf.coef_ intercept[('simple', dtype)] = clf.intercept_ - assert_equal(clf.coef_.dtype, dtype) + assert clf.coef_.dtype == dtype # test precompute Gram array Gram = X.T.dot(X) @@ -762,7 +762,7 @@ def test_enet_float_precision(): clf_multioutput.fit(X, multi_y) coef[('multi', dtype)] = clf_multioutput.coef_ intercept[('multi', dtype)] = clf_multioutput.intercept_ - assert_equal(clf.coef_.dtype, dtype) + assert clf.coef_.dtype == dtype for v in ['simple', 'multi']: assert_array_almost_equal(coef[(v, np.float32)], diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 156ac72958d01..4145888c3d996 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -201,10 +201,10 @@ def test_huber_better_r2_score(): ridge.fit(X, y) ridge_score = ridge.score(X[mask], y[mask]) ridge_outlier_score = ridge.score(X[~mask], y[~mask]) - assert_greater(huber_score, ridge_score) + assert huber_score > ridge_score # The huber model should also fit poorly on the outliers. - assert_greater(ridge_outlier_score, huber_outlier_score) + assert ridge_outlier_score > huber_outlier_score def test_huber_bool(): diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 1533c981fa391..9633989693a83 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -78,7 +78,7 @@ def test_simple_precomputed(): def _assert_same_lars_path_result(output1, output2): - assert_equal(len(output1), len(output2)) + assert len(output1) == len(output2) for o1, o2 in zip(output1, output2): assert_allclose(o1, o2) @@ -150,7 +150,7 @@ def test_collinearity(): _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01) assert not np.isnan(coef_path_).any() residual = np.dot(X, coef_path_[:, -1]) - y - assert_less((residual ** 2).sum(), 1.) # just make sure it's bounded + assert (residual ** 2).sum() < 1. # just make sure it's bounded n_samples = 10 X = rng.rand(n_samples, 5) @@ -246,7 +246,7 @@ def test_rank_deficient_design(): coef_cd_ = coord_descent.fit(X, y).coef_ obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2 + .1 * linalg.norm(coef_cd_, 1)) - assert_less(obj_lars, obj_cd * (1. + 1e-8)) + assert obj_lars < obj_cd * (1. + 1e-8) def test_lasso_lars_vs_lasso_cd(): @@ -262,7 +262,7 @@ def test_lasso_lars_vs_lasso_cd(): lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 # similar test, with the classifiers for alpha in np.linspace(1e-2, 1 - 1e-2, 20): @@ -270,7 +270,7 @@ def test_lasso_lars_vs_lasso_cd(): clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8, normalize=False).fit(X, y) err = linalg.norm(clf1.coef_ - clf2.coef_) - assert_less(err, 1e-3) + assert err < 1e-3 # same test, with normalized data X = diabetes.data @@ -283,7 +283,7 @@ def test_lasso_lars_vs_lasso_cd(): lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 def test_lasso_lars_vs_lasso_cd_early_stopping(): @@ -299,7 +299,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(): lasso_cd.alpha = alphas[-1] lasso_cd.fit(X, y) error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 # same test, with normalization for alpha_min in alphas_min: @@ -310,7 +310,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping(): lasso_cd.alpha = alphas[-1] lasso_cd.fit(X, y) error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 def test_lasso_lars_path_length(): @@ -382,7 +382,7 @@ def objective_function(coef): cd_coef_ = coord_descent.fit(X, y).coef_ cd_obj = objective_function(cd_coef_) - assert_less(lars_obj, cd_obj * (1. + 1e-8)) + assert lars_obj < cd_obj * (1. + 1e-8) def test_lars_add_features(): @@ -399,10 +399,10 @@ def test_lars_add_features(): def test_lars_n_nonzero_coefs(verbose=False): lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose) lars.fit(X, y) - assert_equal(len(lars.coef_.nonzero()[0]), 6) + assert len(lars.coef_.nonzero()[0]) == 6 # The path should be of length 6 + 1 in a Lars going down to 6 # non-zero coefs - assert_equal(len(lars.alphas_), 7) + assert len(lars.alphas_) == 7 @ignore_warnings @@ -482,9 +482,9 @@ def test_lasso_lars_ic(): lars_aic.fit(X, y) nonzero_bic = np.where(lars_bic.coef_)[0] nonzero_aic = np.where(lars_aic.coef_)[0] - assert_greater(lars_bic.alpha_, lars_aic.alpha_) - assert_less(len(nonzero_bic), len(nonzero_aic)) - assert_less(np.max(nonzero_bic), diabetes.data.shape[1]) + assert lars_bic.alpha_ > lars_aic.alpha_ + assert len(nonzero_bic) < len(nonzero_aic) + assert np.max(nonzero_bic) < diabetes.data.shape[1] # test error on unknown IC lars_broken = linear_model.LassoLarsIC('') @@ -577,7 +577,7 @@ def test_lasso_lars_vs_lasso_cd_positive(): lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 # The range of alphas chosen for coefficient comparison here is restricted # as compared with the above test without the positive option. This is due @@ -594,7 +594,7 @@ def test_lasso_lars_vs_lasso_cd_positive(): clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8, normalize=False, positive=True).fit(X, y) err = linalg.norm(clf1.coef_ - clf2.coef_) - assert_less(err, 1e-3) + assert err < 1e-3 # normalized data X = diabetes.data @@ -606,7 +606,7 @@ def test_lasso_lars_vs_lasso_cd_positive(): lasso_cd.alpha = a lasso_cd.fit(X, y) error = linalg.norm(c - lasso_cd.coef_) - assert_less(error, 0.01) + assert error < 0.01 def test_lasso_lars_vs_R_implementation(): diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index 8f2c59f30a216..cdbe2f9b3ba37 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -56,11 +56,11 @@ def check_predictions(clf, X, y): predicted = clf.fit(X, y).predict(X) assert_array_equal(clf.classes_, classes) - assert_equal(predicted.shape, (n_samples,)) + assert predicted.shape == (n_samples,) assert_array_equal(predicted, y) probabilities = clf.predict_proba(X) - assert_equal(probabilities.shape, (n_samples, n_classes)) + assert probabilities.shape == (n_samples, n_classes) assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples)) assert_array_equal(probabilities.argmax(axis=1), y) @@ -188,14 +188,14 @@ def test_predict_iris(): assert_array_equal(np.unique(target), clf.classes_) pred = clf.predict(iris.data) - assert_greater(np.mean(pred == target), .95) + assert np.mean(pred == target) > .95 probabilities = clf.predict_proba(iris.data) assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples)) pred = iris.target_names[probabilities.argmax(axis=1)] - assert_greater(np.mean(pred == target), .95) + assert np.mean(pred == target) > .95 @pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga']) @@ -260,8 +260,8 @@ def test_multinomial_binary(solver): random_state=42, max_iter=2000) clf.fit(iris.data, target) - assert_equal(clf.coef_.shape, (1, iris.data.shape[1])) - assert_equal(clf.intercept_.shape, (1,)) + assert clf.coef_.shape == (1, iris.data.shape[1]) + assert clf.intercept_.shape == (1,) assert_array_equal(clf.predict(iris.data), target) mlr = LogisticRegression(solver=solver, multi_class='multinomial', @@ -269,7 +269,7 @@ def test_multinomial_binary(solver): mlr.fit(iris.data, target) pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)] - assert_greater(np.mean(pred == target), .9) + assert np.mean(pred == target) > .9 def test_multinomial_binary_probabilities(): @@ -513,7 +513,7 @@ def test_logistic_cv(): assert_array_equal(lr_cv.coef_.shape, (1, n_features)) assert_array_equal(lr_cv.classes_, [-1, 1]) - assert_equal(len(lr_cv.classes_), 2) + assert len(lr_cv.classes_) == 2 coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values())) assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features)) @@ -604,7 +604,7 @@ def test_logistic_cv_sparse(): clfs.fit(csr, y) assert_array_almost_equal(clfs.coef_, clf.coef_) assert_array_almost_equal(clfs.intercept_, clf.intercept_) - assert_equal(clfs.C_, clf.C_) + assert clfs.C_ == clf.C_ def test_intercept_logistic_helper(): @@ -1040,7 +1040,7 @@ def test_logreg_intercept_scaling_zero(): clf = LogisticRegression(fit_intercept=False) clf.fit(X, Y1) - assert_equal(clf.intercept_, 0.) + assert clf.intercept_ == 0. def test_logreg_l1(): @@ -1147,13 +1147,13 @@ def test_logreg_predict_proba_multinomial(): clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs") clf_ovr.fit(X, y) clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X)) - assert_greater(clf_ovr_loss, clf_multi_loss) + assert clf_ovr_loss > clf_multi_loss # Predicted probabilities using the soft-max function should give a # smaller loss than those using the logistic function. clf_multi_loss = log_loss(y, clf_multi.predict_proba(X)) clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X)) - assert_greater(clf_wrong_loss, clf_multi_loss) + assert clf_wrong_loss > clf_multi_loss def test_max_iter(): @@ -1172,7 +1172,7 @@ def test_max_iter(): multi_class=multi_class, random_state=0, solver=solver) assert_warns(ConvergenceWarning, lr.fit, X, y_bin) - assert_equal(lr.n_iter_[0], max_iter) + assert lr.n_iter_[0] == max_iter @pytest.mark.parametrize('solver', @@ -1193,16 +1193,16 @@ def test_n_iter(solver): solver=solver, C=1., random_state=42, max_iter=100) clf.fit(X, y) - assert_equal(clf.n_iter_.shape, (n_classes,)) + assert clf.n_iter_.shape == (n_classes,) n_classes = np.unique(y).shape[0] clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr', solver=solver, Cs=n_Cs, cv=n_cv_fold, random_state=42, max_iter=100) clf.fit(X, y) - assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs)) + assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs) clf.fit(X, y_bin) - assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs)) + assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs) # multinomial case n_classes = 1 @@ -1213,15 +1213,15 @@ def test_n_iter(solver): solver=solver, C=1., random_state=42, max_iter=100) clf.fit(X, y) - assert_equal(clf.n_iter_.shape, (n_classes,)) + assert clf.n_iter_.shape == (n_classes,) clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial', solver=solver, Cs=n_Cs, cv=n_cv_fold, random_state=42, max_iter=100) clf.fit(X, y) - assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs)) + assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs) clf.fit(X, y_bin) - assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs)) + assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs) @pytest.mark.parametrize('solver', ('newton-cg', 'sag', 'saga', 'lbfgs')) @@ -1251,9 +1251,9 @@ def test_warm_start(solver, warm_start, fit_intercept, multi_class): % (solver, multi_class, str(fit_intercept), str(warm_start))) if warm_start: - assert_greater(2.0, cum_diff, msg) + assert 2.0 > cum_diff, msg else: - assert_greater(cum_diff, 2.0, msg) + assert cum_diff > 2.0, msg def test_saga_vs_liblinear(): @@ -1314,17 +1314,17 @@ def test_dtype_match(solver, multi_class): # Check type consistency lr_32 = clone(lr_templ) lr_32.fit(X_32, y_32) - assert_equal(lr_32.coef_.dtype, X_32.dtype) + assert lr_32.coef_.dtype == X_32.dtype # check consistency with sparsity lr_32_sparse = clone(lr_templ) lr_32_sparse.fit(X_sparse_32, y_32) - assert_equal(lr_32_sparse.coef_.dtype, X_sparse_32.dtype) + assert lr_32_sparse.coef_.dtype == X_sparse_32.dtype # Check accuracy consistency lr_64 = clone(lr_templ) lr_64.fit(X_64, y_64) - assert_equal(lr_64.coef_.dtype, X_64.dtype) + assert lr_64.coef_.dtype == X_64.dtype # solver_tol bounds the norm of the loss gradient # dw ~= inv(H)*grad ==> |dw| ~= |inv(H)| * solver_tol, where H - hessian diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index c0736f740835a..074af8045a59d 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -30,16 +30,16 @@ def test_correct_shapes(): - assert_equal(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape, + assert (orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape == (n_features,)) - assert_equal(orthogonal_mp(X, y, n_nonzero_coefs=5).shape, + assert (orthogonal_mp(X, y, n_nonzero_coefs=5).shape == (n_features, 3)) def test_correct_shapes_gram(): - assert_equal(orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape, + assert (orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape == (n_features,)) - assert_equal(orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape, + assert (orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape == (n_features, 3)) @@ -120,13 +120,13 @@ def test_orthogonal_mp_gram_readonly(): def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) - assert_equal(omp.coef_.shape, (n_features,)) - assert_equal(omp.intercept_.shape, ()) + assert omp.coef_.shape == (n_features,) + assert omp.intercept_.shape == () assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs omp.fit(X, y) - assert_equal(omp.coef_.shape, (n_targets, n_features)) - assert_equal(omp.intercept_.shape, (n_targets,)) + assert omp.coef_.shape == (n_targets, n_features) + assert omp.intercept_.shape == (n_targets,) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs coef_normalized = omp.coef_[0].copy() @@ -137,12 +137,12 @@ def test_estimator(): omp.set_params(fit_intercept=False, normalize=False) omp.fit(X, y[:, 0]) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs - assert_equal(omp.coef_.shape, (n_features,)) - assert_equal(omp.intercept_, 0) + assert omp.coef_.shape == (n_features,) + assert omp.intercept_ == 0 omp.fit(X, y) - assert_equal(omp.coef_.shape, (n_targets, n_features)) - assert_equal(omp.intercept_, 0) + assert omp.coef_.shape == (n_targets, n_features) + assert omp.intercept_ == 0 assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs @@ -175,18 +175,18 @@ def test_no_atoms(): Xy_empty = np.dot(X.T, y_empty) gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, 1) gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, 1) - assert_equal(np.all(gamma_empty == 0), True) - assert_equal(np.all(gamma_empty_gram == 0), True) + assert np.all(gamma_empty == 0) == True + assert np.all(gamma_empty_gram == 0) == True def test_omp_path(): path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True) last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False) - assert_equal(path.shape, (n_features, n_targets, 5)) + assert path.shape == (n_features, n_targets, 5) assert_array_almost_equal(path[:, :, -1], last) path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True) last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False) - assert_equal(path.shape, (n_features, n_targets, 5)) + assert path.shape == (n_features, n_targets, 5) assert_array_almost_equal(path[:, :, -1], last) @@ -195,7 +195,7 @@ def test_omp_return_path_prop_with_gram(): precompute=True) last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False, precompute=True) - assert_equal(path.shape, (n_features, n_targets, 5)) + assert path.shape == (n_features, n_targets, 5) assert_array_almost_equal(path[:, :, -1], last) @@ -205,7 +205,7 @@ def test_omp_cv(): ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10) ompcv.fit(X, y_) - assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) + assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index f02db4ccdf237..d0253bbce64f6 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -79,7 +79,7 @@ def test_classifier_accuracy(): random_state=1, average=average, tol=None) clf.fit(data, y) score = clf.score(data, y) - assert_greater(score, 0.79) + assert score > 0.79 if average: assert hasattr(clf, 'average_coef_') assert hasattr(clf, 'average_intercept_') @@ -99,7 +99,7 @@ def test_classifier_partial_fit(): for t in range(30): clf.partial_fit(data, y, classes) score = clf.score(data, y) - assert_greater(score, 0.79) + assert score > 0.79 if average: assert hasattr(clf, 'average_coef_') assert hasattr(clf, 'average_intercept_') @@ -240,7 +240,7 @@ def test_regressor_mse(): random_state=0, average=average, max_iter=5) reg.fit(data, y_bin) pred = reg.predict(data) - assert_less(np.mean((pred - y_bin) ** 2), 1.7) + assert np.mean((pred - y_bin) ** 2) < 1.7 if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') @@ -262,7 +262,7 @@ def test_regressor_partial_fit(): for t in range(50): reg.partial_fit(data, y_bin) pred = reg.predict(data) - assert_less(np.mean((pred - y_bin) ** 2), 1.7) + assert np.mean((pred - y_bin) ** 2) < 1.7 if average: assert hasattr(reg, 'average_coef_') assert hasattr(reg, 'average_intercept_') diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py index e46949c256e9f..75b91e7b50ba9 100644 --- a/sklearn/linear_model/tests/test_perceptron.py +++ b/sklearn/linear_model/tests/test_perceptron.py @@ -51,7 +51,7 @@ def test_perceptron_accuracy(): clf = Perceptron(max_iter=100, tol=None, shuffle=False) clf.fit(data, y) score = clf.score(data, y) - assert_greater(score, 0.7) + assert score > 0.7 # 0.23. warning about tol not having its correct default value. diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index 91b1bd34dc866..af8153590d2ff 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -45,13 +45,13 @@ def test_ransac_inliers_outliers(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) def test_ransac_is_data_valid(): def is_data_valid(X, y): - assert_equal(X.shape[0], 2) - assert_equal(y.shape[0], 2) + assert X.shape[0] == 2 + assert y.shape[0] == 2 return False rng = np.random.RandomState(0) @@ -69,8 +69,8 @@ def is_data_valid(X, y): def test_ransac_is_model_valid(): def is_model_valid(estimator, X, y): - assert_equal(X.shape[0], 2) - assert_equal(y.shape[0], 2) + assert X.shape[0] == 2 + assert y.shape[0] == 2 return False base_estimator = LinearRegression() @@ -99,7 +99,7 @@ def test_ransac_max_trials(): for i in range(50): ransac_estimator.set_params(min_samples=2, random_state=i) ransac_estimator.fit(X, y) - assert_less(ransac_estimator.n_trials_, max_trials + 1) + assert ransac_estimator.n_trials_ < max_trials + 1 def test_ransac_stop_n_inliers(): base_estimator = LinearRegression() @@ -108,7 +108,7 @@ def test_ransac_stop_n_inliers(): random_state=0) ransac_estimator.fit(X, y) - assert_equal(ransac_estimator.n_trials_, 1) + assert ransac_estimator.n_trials_ == 1 def test_ransac_stop_score(): @@ -118,7 +118,7 @@ def test_ransac_stop_score(): random_state=0) ransac_estimator.fit(X, y) - assert_equal(ransac_estimator.n_trials_, 1) + assert ransac_estimator.n_trials_ == 1 def test_ransac_score(): @@ -132,8 +132,8 @@ def test_ransac_score(): residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) - assert_equal(ransac_estimator.score(X[2:], y[2:]), 1) - assert_less(ransac_estimator.score(X[:2], y[:2]), 1) + assert ransac_estimator.score(X[2:], y[2:]) == 1 + assert ransac_estimator.score(X[:2], y[:2]) < 1 def test_ransac_predict(): @@ -147,7 +147,7 @@ def test_ransac_predict(): residual_threshold=0.5, random_state=0) ransac_estimator.fit(X, y) - assert_equal(ransac_estimator.predict(X), np.zeros(100)) + assert_array_equal(ransac_estimator.predict(X), np.zeros(100)) def test_ransac_resid_thresh_no_inliers(): @@ -160,9 +160,9 @@ def test_ransac_resid_thresh_no_inliers(): msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) - assert_equal(ransac_estimator.n_skips_no_inliers_, 5) - assert_equal(ransac_estimator.n_skips_invalid_data_, 0) - assert_equal(ransac_estimator.n_skips_invalid_model_, 0) + assert ransac_estimator.n_skips_no_inliers_ == 5 + assert ransac_estimator.n_skips_invalid_data_ == 0 + assert ransac_estimator.n_skips_invalid_model_ == 0 def test_ransac_no_valid_data(): @@ -176,9 +176,9 @@ def is_data_valid(X, y): msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) - assert_equal(ransac_estimator.n_skips_no_inliers_, 0) - assert_equal(ransac_estimator.n_skips_invalid_data_, 5) - assert_equal(ransac_estimator.n_skips_invalid_model_, 0) + assert ransac_estimator.n_skips_no_inliers_ == 0 + assert ransac_estimator.n_skips_invalid_data_ == 5 + assert ransac_estimator.n_skips_invalid_model_ == 0 def test_ransac_no_valid_model(): @@ -192,9 +192,9 @@ def is_model_valid(estimator, X, y): msg = ("RANSAC could not find a valid consensus set") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) - assert_equal(ransac_estimator.n_skips_no_inliers_, 0) - assert_equal(ransac_estimator.n_skips_invalid_data_, 0) - assert_equal(ransac_estimator.n_skips_invalid_model_, 5) + assert ransac_estimator.n_skips_no_inliers_ == 0 + assert ransac_estimator.n_skips_invalid_data_ == 0 + assert ransac_estimator.n_skips_invalid_model_ == 5 def test_ransac_exceed_max_skips(): @@ -209,9 +209,9 @@ def is_data_valid(X, y): msg = ("RANSAC skipped more iterations than `max_skips`") assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y) - assert_equal(ransac_estimator.n_skips_no_inliers_, 0) - assert_equal(ransac_estimator.n_skips_invalid_data_, 4) - assert_equal(ransac_estimator.n_skips_invalid_model_, 0) + assert ransac_estimator.n_skips_no_inliers_ == 0 + assert ransac_estimator.n_skips_invalid_data_ == 4 + assert ransac_estimator.n_skips_invalid_model_ == 0 def test_ransac_warn_exceed_max_skips(): @@ -233,9 +233,9 @@ def is_data_valid(X, y): max_trials=5) assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y) - assert_equal(ransac_estimator.n_skips_no_inliers_, 0) - assert_equal(ransac_estimator.n_skips_invalid_data_, 4) - assert_equal(ransac_estimator.n_skips_invalid_model_, 0) + assert ransac_estimator.n_skips_no_inliers_ == 0 + assert ransac_estimator.n_skips_invalid_data_ == 4 + assert ransac_estimator.n_skips_invalid_model_ == 0 def test_ransac_sparse_coo(): @@ -250,7 +250,7 @@ def test_ransac_sparse_coo(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) def test_ransac_sparse_csr(): @@ -265,7 +265,7 @@ def test_ransac_sparse_csr(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) def test_ransac_sparse_csc(): @@ -280,7 +280,7 @@ def test_ransac_sparse_csc(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) def test_ransac_none_estimator(): @@ -352,7 +352,7 @@ def test_ransac_multi_dimensional_targets(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) @pytest.mark.filterwarnings('ignore: The default value of multioutput') # 0.23 @@ -409,7 +409,7 @@ def test_ransac_default_residual_threshold(): ).astype(np.bool_) ref_inlier_mask[outliers] = False - assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) + assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask) def test_ransac_dynamic_max_trials(): @@ -419,29 +419,29 @@ def test_ransac_dynamic_max_trials(): # Cambridge University Press, ISBN: 0521540518 # e = 0%, min_samples = X - assert_equal(_dynamic_max_trials(100, 100, 2, 0.99), 1) + assert _dynamic_max_trials(100, 100, 2, 0.99) == 1 # e = 5%, min_samples = 2 - assert_equal(_dynamic_max_trials(95, 100, 2, 0.99), 2) + assert _dynamic_max_trials(95, 100, 2, 0.99) == 2 # e = 10%, min_samples = 2 - assert_equal(_dynamic_max_trials(90, 100, 2, 0.99), 3) + assert _dynamic_max_trials(90, 100, 2, 0.99) == 3 # e = 30%, min_samples = 2 - assert_equal(_dynamic_max_trials(70, 100, 2, 0.99), 7) + assert _dynamic_max_trials(70, 100, 2, 0.99) == 7 # e = 50%, min_samples = 2 - assert_equal(_dynamic_max_trials(50, 100, 2, 0.99), 17) + assert _dynamic_max_trials(50, 100, 2, 0.99) == 17 # e = 5%, min_samples = 8 - assert_equal(_dynamic_max_trials(95, 100, 8, 0.99), 5) + assert _dynamic_max_trials(95, 100, 8, 0.99) == 5 # e = 10%, min_samples = 8 - assert_equal(_dynamic_max_trials(90, 100, 8, 0.99), 9) + assert _dynamic_max_trials(90, 100, 8, 0.99) == 9 # e = 30%, min_samples = 8 - assert_equal(_dynamic_max_trials(70, 100, 8, 0.99), 78) + assert _dynamic_max_trials(70, 100, 8, 0.99) == 78 # e = 50%, min_samples = 8 - assert_equal(_dynamic_max_trials(50, 100, 8, 0.99), 1177) + assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177 # e = 0%, min_samples = 10 - assert_equal(_dynamic_max_trials(1, 100, 10, 0), 0) - assert_equal(_dynamic_max_trials(1, 100, 10, 1), float('inf')) + assert _dynamic_max_trials(1, 100, 10, 0) == 0 + assert _dynamic_max_trials(1, 100, 10, 1) == float('inf') base_estimator = LinearRegression() ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, @@ -458,7 +458,7 @@ def test_ransac_fit_sample_weight(): weights = np.ones(n_samples) ransac_estimator.fit(X, y, weights) # sanity check - assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples) + assert ransac_estimator.inlier_mask_.shape[0] == n_samples ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_ ).astype(np.bool_) diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index cfc487c6ffe66..de517b20e859f 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -77,13 +77,13 @@ def test_ridge(solver): ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) - assert_equal(ridge.coef_.shape, (X.shape[1], )) - assert_greater(ridge.score(X, y), 0.47) + assert ridge.coef_.shape == (X.shape[1], ) + assert ridge.score(X, y) > 0.47 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) - assert_greater(ridge.score(X, y), 0.47) + assert ridge.score(X, y) > 0.47 # With more features than samples n_samples, n_features = 5, 10 @@ -91,12 +91,12 @@ def test_ridge(solver): X = rng.randn(n_samples, n_features) ridge = Ridge(alpha=alpha, solver=solver) ridge.fit(X, y) - assert_greater(ridge.score(X, y), .9) + assert ridge.score(X, y) > .9 if solver in ("cholesky", "sag"): # Currently the only solvers to support sample_weight. ridge.fit(X, y, sample_weight=np.ones(n_samples)) - assert_greater(ridge.score(X, y), 0.9) + assert ridge.score(X, y) > 0.9 def test_primal_dual_relationship(): @@ -119,7 +119,7 @@ def test_ridge_singular(): ridge = Ridge(alpha=0) ridge.fit(X, y) - assert_greater(ridge.score(X, y), 0.9) + assert ridge.score(X, y) > 0.9 def test_ridge_regression_sample_weights(): @@ -214,16 +214,16 @@ def test_ridge_shapes(): ridge = Ridge() ridge.fit(X, y) - assert_equal(ridge.coef_.shape, (n_features,)) - assert_equal(ridge.intercept_.shape, ()) + assert ridge.coef_.shape == (n_features,) + assert ridge.intercept_.shape == () ridge.fit(X, Y1) - assert_equal(ridge.coef_.shape, (1, n_features)) - assert_equal(ridge.intercept_.shape, (1, )) + assert ridge.coef_.shape == (1, n_features) + assert ridge.intercept_.shape == (1, ) ridge.fit(X, Y) - assert_equal(ridge.coef_.shape, (2, n_features)) - assert_equal(ridge.intercept_.shape, (2, )) + assert ridge.coef_.shape == (2, n_features) + assert ridge.intercept_.shape == (2, ) def test_ridge_intercept(): @@ -254,16 +254,16 @@ def test_toy_ridge_object(): X_test = [[1], [2], [3], [4]] assert_almost_equal(reg.predict(X_test), [1., 2, 3, 4]) - assert_equal(len(reg.coef_.shape), 1) - assert_equal(type(reg.intercept_), np.float64) + assert len(reg.coef_.shape) == 1 + assert type(reg.intercept_) == np.float64 Y = np.vstack((Y, Y)).T reg.fit(X, Y) X_test = [[1], [2], [3], [4]] - assert_equal(len(reg.coef_.shape), 2) - assert_equal(type(reg.intercept_), np.ndarray) + assert len(reg.coef_.shape) == 2 + assert type(reg.intercept_) == np.ndarray def test_ridge_vs_lstsq(): @@ -616,7 +616,7 @@ def _test_ridge_cv_normalize(filter_): gs = GridSearchCV(Ridge(normalize=True, solver='sparse_cg'), cv=3, param_grid={'alpha': ridge_cv.alphas}) gs.fit(filter_(10. * X_diabetes), y_diabetes) - assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_) + assert gs.best_estimator_.alpha == ridge_cv.alpha_ def _test_ridge_cv(filter_): @@ -624,16 +624,16 @@ def _test_ridge_cv(filter_): ridge_cv.fit(filter_(X_diabetes), y_diabetes) ridge_cv.predict(filter_(X_diabetes)) - assert_equal(len(ridge_cv.coef_.shape), 1) - assert_equal(type(ridge_cv.intercept_), np.float64) + assert len(ridge_cv.coef_.shape) == 1 + assert type(ridge_cv.intercept_) == np.float64 cv = KFold(5) ridge_cv.set_params(cv=cv) ridge_cv.fit(filter_(X_diabetes), y_diabetes) ridge_cv.predict(filter_(X_diabetes)) - assert_equal(len(ridge_cv.coef_.shape), 1) - assert_equal(type(ridge_cv.intercept_), np.float64) + assert len(ridge_cv.coef_.shape) == 1 + assert type(ridge_cv.intercept_) == np.float64 def _test_ridge_diabetes(filter_): @@ -649,7 +649,7 @@ def _test_multi_ridge_diabetes(filter_): ridge = Ridge(fit_intercept=False) ridge.fit(filter_(X_diabetes), Y) - assert_equal(ridge.coef_.shape, (2, n_features)) + assert ridge.coef_.shape == (2, n_features) Y_pred = ridge.predict(filter_(X_diabetes)) ridge.fit(filter_(X_diabetes), y_diabetes) y_pred = ridge.predict(filter_(X_diabetes)) @@ -662,9 +662,9 @@ def _test_ridge_classifiers(filter_): n_features = X_iris.shape[1] for reg in (RidgeClassifier(), RidgeClassifierCV()): reg.fit(filter_(X_iris), y_iris) - assert_equal(reg.coef_.shape, (n_classes, n_features)) + assert reg.coef_.shape == (n_classes, n_features) y_pred = reg.predict(filter_(X_iris)) - assert_greater(np.mean(y_iris == y_pred), .79) + assert np.mean(y_iris == y_pred) > .79 cv = KFold(5) reg = RidgeClassifierCV(cv=cv) @@ -743,7 +743,7 @@ def test_class_weights(): reg.fit(X, y) rega = RidgeClassifier(class_weight='balanced') rega.fit(X, y) - assert_equal(len(rega.classes_), 2) + assert len(rega.classes_) == 2 assert_array_almost_equal(reg.coef_, rega.coef_) assert_array_almost_equal(reg.intercept_, rega.intercept_) @@ -988,7 +988,7 @@ def func(): def test_sparse_cg_max_iter(): reg = Ridge(solver="sparse_cg", max_iter=1) reg.fit(X_diabetes, y_diabetes) - assert_equal(reg.coef_.shape[0], X_diabetes.shape[1]) + assert reg.coef_.shape[0] == X_diabetes.shape[1] @ignore_warnings @@ -1007,7 +1007,7 @@ def test_n_iter(): for solver in ('sparse_cg', 'svd', 'cholesky'): reg = Ridge(solver=solver, max_iter=1, tol=1e-1) reg.fit(X, y_n) - assert_equal(reg.n_iter_, None) + assert reg.n_iter_ == None def test_ridge_fit_intercept_sparse(): diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index f1887302f5b46..3407d00fb1cc4 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -471,8 +471,8 @@ def test_sag_regressor(): clf2.fit(sp.csr_matrix(X), y) score1 = clf1.score(X, y) score2 = clf2.score(X, y) - assert_greater(score1, 0.99) - assert_greater(score2, 0.99) + assert score1 > 0.99 + assert score2 > 0.99 # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() @@ -485,8 +485,8 @@ def test_sag_regressor(): score1 = clf1.score(X, y) score2 = clf2.score(X, y) score2 = clf2.score(X, y) - assert_greater(score1, 0.5) - assert_greater(score2, 0.5) + assert score1 > 0.5 + assert score2 > 0.5 @pytest.mark.filterwarnings('ignore:The max_iter was reached') diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index 7e5f88ce2e0cf..abdb2ecefd10b 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -211,13 +211,13 @@ def _test_warm_start(klass, X, Y, lr): warm_start=True, learning_rate=lr) clf3.fit(X, Y) - assert_equal(clf3.t_, clf.t_) + assert clf3.t_ == clf.t_ assert_array_almost_equal(clf3.coef_, clf.coef_) clf3.set_params(alpha=0.001) clf3.fit(X, Y) - assert_equal(clf3.t_, clf2.t_) + assert clf3.t_ == clf2.t_ assert_array_almost_equal(clf3.coef_, clf2.coef_) @@ -583,9 +583,9 @@ def test_partial_fit_weight_class_balanced(klass): def test_sgd_multiclass(klass): # Multi-class test case clf = klass(alpha=0.01, max_iter=20).fit(X2, Y2) - assert_equal(clf.coef_.shape, (3, 2)) - assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) + assert clf.coef_.shape == (3, 2) + assert clf.intercept_.shape == (3,) + assert clf.decision_function([[0, 0]]).shape == (1, 3) pred = clf.predict(T2) assert_array_equal(pred, true_result2) @@ -621,7 +621,7 @@ def test_sgd_multiclass_with_init_coef(klass): clf = klass(alpha=0.01, max_iter=20) clf.fit(X2, Y2, coef_init=np.zeros((3, 2)), intercept_init=np.zeros(3)) - assert_equal(clf.coef_.shape, (3, 2)) + assert clf.coef_.shape == (3, 2) assert clf.intercept_.shape, (3,) pred = clf.predict(T2) assert_array_equal(pred, true_result2) @@ -631,9 +631,9 @@ def test_sgd_multiclass_with_init_coef(klass): def test_sgd_multiclass_njobs(klass): # Multi-class test case with multi-core support clf = klass(alpha=0.01, max_iter=20, n_jobs=2).fit(X2, Y2) - assert_equal(clf.coef_.shape, (3, 2)) - assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) + assert clf.coef_.shape == (3, 2) + assert clf.intercept_.shape == (3,) + assert clf.decision_function([[0, 0]]).shape == (1, 3) pred = clf.predict(T2) assert_array_equal(pred, true_result2) @@ -739,9 +739,9 @@ def test_sgd_proba(klass): d = clf.decision_function([[3, 2]]) p = clf.predict_proba([[3, 2]]) if klass != SparseSGDClassifier: - assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1)) + assert np.argmax(d, axis=1) == np.argmax(p, axis=1) else: # XXX the sparse test gets a different X2 (?) - assert_equal(np.argmin(d, axis=1), np.argmin(p, axis=1)) + assert np.argmin(d, axis=1) == np.argmin(p, axis=1) # the following sample produces decision_function values < -1, # which would cause naive normalization to fail (see comment @@ -896,14 +896,14 @@ def test_balanced_weight(klass): clf = klass(max_iter=1000, class_weight=None, shuffle=False) clf.fit(X_imbalanced, y_imbalanced) y_pred = clf.predict(X) - assert_less(metrics.f1_score(y, y_pred, average='weighted'), 0.96) + assert metrics.f1_score(y, y_pred, average='weighted') < 0.96 # fit a model with balanced class_weight enabled clf = klass(max_iter=1000, class_weight="balanced", shuffle=False) clf.fit(X_imbalanced, y_imbalanced) y_pred = clf.predict(X) - assert_greater(metrics.f1_score(y, y_pred, average='weighted'), 0.96) + assert metrics.f1_score(y, y_pred, average='weighted') > 0.96 @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier]) @@ -947,9 +947,9 @@ def test_partial_fit_binary(klass): classes = np.unique(Y) clf.partial_fit(X[:third], Y[:third], classes=classes) - assert_equal(clf.coef_.shape, (1, X.shape[1])) - assert_equal(clf.intercept_.shape, (1,)) - assert_equal(clf.decision_function([[0, 0]]).shape, (1, )) + assert clf.coef_.shape == (1, X.shape[1]) + assert clf.intercept_.shape == (1,) + assert clf.decision_function([[0, 0]]).shape == (1, ) id1 = id(clf.coef_.data) clf.partial_fit(X[third:], Y[third:]) @@ -968,9 +968,9 @@ def test_partial_fit_multiclass(klass): classes = np.unique(Y2) clf.partial_fit(X2[:third], Y2[:third], classes=classes) - assert_equal(clf.coef_.shape, (3, X2.shape[1])) - assert_equal(clf.intercept_.shape, (3,)) - assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3)) + assert clf.coef_.shape == (3, X2.shape[1]) + assert clf.intercept_.shape == (3,) + assert clf.decision_function([[0, 0]]).shape == (1, 3) id1 = id(clf.coef_.data) clf.partial_fit(X2[third:], Y2[third:]) @@ -986,12 +986,12 @@ def test_partial_fit_multiclass_average(klass): classes = np.unique(Y2) clf.partial_fit(X2[:third], Y2[:third], classes=classes) - assert_equal(clf.coef_.shape, (3, X2.shape[1])) - assert_equal(clf.intercept_.shape, (3,)) + assert clf.coef_.shape == (3, X2.shape[1]) + assert clf.intercept_.shape == (3,) clf.partial_fit(X2[third:], Y2[third:]) - assert_equal(clf.coef_.shape, (3, X2.shape[1])) - assert_equal(clf.intercept_.shape, (3,)) + assert clf.coef_.shape == (3, X2.shape[1]) + assert clf.intercept_.shape == (3,) @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier]) @@ -1022,7 +1022,7 @@ def test_partial_fit_equal_fit_classif(klass, lr): clf.partial_fit(X_, Y_, classes=classes) y_pred2 = clf.decision_function(T_) - assert_equal(clf.t_, t) + assert clf.t_ == t assert_array_almost_equal(y_pred, y_pred2, decimal=2) @@ -1033,22 +1033,22 @@ def test_regression_losses(klass): eta0=0.1, loss="epsilon_insensitive", random_state=random_state) clf.fit(X, Y) - assert_equal(1.0, np.mean(clf.predict(X) == Y)) + assert 1.0 == np.mean(clf.predict(X) == Y) clf = klass(alpha=0.01, learning_rate="constant", eta0=0.1, loss="squared_epsilon_insensitive", random_state=random_state) clf.fit(X, Y) - assert_equal(1.0, np.mean(clf.predict(X) == Y)) + assert 1.0 == np.mean(clf.predict(X) == Y) clf = klass(alpha=0.01, loss="huber", random_state=random_state) clf.fit(X, Y) - assert_equal(1.0, np.mean(clf.predict(X) == Y)) + assert 1.0 == np.mean(clf.predict(X) == Y) clf = klass(alpha=0.01, learning_rate="constant", eta0=0.01, loss="squared_loss", random_state=random_state) clf.fit(X, Y) - assert_equal(1.0, np.mean(clf.predict(X) == Y)) + assert 1.0 == np.mean(clf.predict(X) == Y) @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier]) @@ -1076,7 +1076,7 @@ def test_sgd_reg(klass): # Check that SGD gives any results. clf = klass(alpha=0.1, max_iter=2, fit_intercept=False) clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2]) - assert_equal(clf.coef_[0], clf.coef_[1]) + assert clf.coef_[0] == clf.coef_[1] @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor]) @@ -1177,7 +1177,7 @@ def test_sgd_least_squares_fit(klass): fit_intercept=False) clf.fit(X, y) score = clf.score(X, y) - assert_greater(score, 0.99) + assert score > 0.99 # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() @@ -1186,7 +1186,7 @@ def test_sgd_least_squares_fit(klass): fit_intercept=False) clf.fit(X, y) score = clf.score(X, y) - assert_greater(score, 0.5) + assert score > 0.5 @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor]) @@ -1231,7 +1231,7 @@ def test_sgd_huber_fit(klass): fit_intercept=False) clf.fit(X, y) score = clf.score(X, y) - assert_greater(score, 0.99) + assert score > 0.99 # simple linear function with noise y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel() @@ -1240,7 +1240,7 @@ def test_sgd_huber_fit(klass): fit_intercept=False) clf.fit(X, y) score = clf.score(X, y) - assert_greater(score, 0.5) + assert score > 0.5 @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor]) @@ -1279,9 +1279,9 @@ def test_partial_fit(klass): clf = klass(alpha=0.01) clf.partial_fit(X[:third], Y[:third]) - assert_equal(clf.coef_.shape, (X.shape[1], )) - assert_equal(clf.intercept_.shape, (1,)) - assert_equal(clf.predict([[0, 0]]).shape, (1, )) + assert clf.coef_.shape == (X.shape[1], ) + assert clf.intercept_.shape == (1,) + assert clf.predict([[0, 0]]).shape == (1, ) id1 = id(clf.coef_.data) clf.partial_fit(X[third:], Y[third:]) @@ -1306,7 +1306,7 @@ def test_partial_fit_equal_fit(klass, lr): clf.partial_fit(X, Y) y_pred2 = clf.predict(T) - assert_equal(clf.t_, t) + assert clf.t_ == t assert_array_almost_equal(y_pred, y_pred2, decimal=2) @@ -1407,25 +1407,25 @@ def test_tol_parameter(): max_iter = 42 model_0 = SGDClassifier(tol=None, random_state=0, max_iter=max_iter) model_0.fit(X, y) - assert_equal(max_iter, model_0.n_iter_) + assert max_iter == model_0.n_iter_ # If tol is not None, the number of iteration should be less than max_iter max_iter = 2000 model_1 = SGDClassifier(tol=0, random_state=0, max_iter=max_iter) model_1.fit(X, y) - assert_greater(max_iter, model_1.n_iter_) - assert_greater(model_1.n_iter_, 5) + assert max_iter > model_1.n_iter_ + assert model_1.n_iter_ > 5 # A larger tol should yield a smaller number of iteration model_2 = SGDClassifier(tol=0.1, random_state=0, max_iter=max_iter) model_2.fit(X, y) - assert_greater(model_1.n_iter_, model_2.n_iter_) - assert_greater(model_2.n_iter_, 3) + assert model_1.n_iter_ > model_2.n_iter_ + assert model_2.n_iter_ > 3 # Strict tolerance and small max_iter should trigger a warning model_3 = SGDClassifier(max_iter=3, tol=1e-3, random_state=0) model_3 = assert_warns(ConvergenceWarning, model_3.fit, X, y) - assert_equal(model_3.n_iter_, 3) + assert model_3.n_iter_ == 3 def _test_gradient_common(loss_function, cases): diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py index d637ae1385052..326bcc94433bc 100644 --- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py @@ -21,7 +21,7 @@ def test_sparse_coef(): clf.coef_ = [1, 2, 3] assert sp.isspmatrix(clf.sparse_coef_) - assert_equal(clf.sparse_coef_.toarray().tolist()[0], clf.coef_) + assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_ def test_normalize_option(): @@ -162,7 +162,7 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) - assert_greater(s_clf.score(X_test, y_test), 0.85) + assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, @@ -171,13 +171,13 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): d_clf.fit(X_train.toarray(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) - assert_greater(d_clf.score(X_test, y_test), 0.85) + assert d_clf.score(X_test, y_test) > 0.85 assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse - assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative) + assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative def test_sparse_enet_not_as_toy_dataset(): @@ -203,16 +203,16 @@ def test_sparse_lasso_not_as_toy_dataset(): s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) - assert_greater(s_clf.score(X_test, y_test), 0.85) + assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train.toarray(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) - assert_greater(d_clf.score(X_test, y_test), 0.85) + assert d_clf.score(X_test, y_test) > 0.85 # check that the coefs are sparse - assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative) + assert np.sum(s_clf.coef_ != 0.0) == n_informative def test_enet_multitarget(): @@ -241,8 +241,8 @@ def test_path_parameters(): l1_ratio=0.5, fit_intercept=False) ignore_warnings(clf.fit)(X, y) # new params assert_almost_equal(0.5, clf.l1_ratio) - assert_equal(n_alphas, clf.n_alphas) - assert_equal(n_alphas, len(clf.alphas_)) + assert n_alphas == clf.n_alphas + assert n_alphas == len(clf.alphas_) sparse_mse_path = clf.mse_path_ ignore_warnings(clf.fit)(X.toarray(), y) # compare with dense data assert_almost_equal(clf.mse_path_, sparse_mse_path) diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py index 57277a68abd88..70468b4bb8464 100644 --- a/sklearn/linear_model/tests/test_theil_sen.py +++ b/sklearn/linear_model/tests/test_theil_sen.py @@ -163,7 +163,7 @@ def test_theil_sen_1d(): X, y, w, c = gen_toy_problem_1d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) - assert_greater(np.abs(lstq.coef_ - w), 0.9) + assert np.abs(lstq.coef_ - w) > 0.9 # Check that Theil-Sen works theil_sen = TheilSenRegressor(random_state=0).fit(X, y) assert_array_almost_equal(theil_sen.coef_, w, 1) @@ -174,7 +174,7 @@ def test_theil_sen_1d_no_intercept(): X, y, w, c = gen_toy_problem_1d(intercept=False) # Check that Least Squares fails lstq = LinearRegression(fit_intercept=False).fit(X, y) - assert_greater(np.abs(lstq.coef_ - w - c), 0.5) + assert np.abs(lstq.coef_ - w - c) > 0.5 # Check that Theil-Sen works theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y) @@ -186,7 +186,7 @@ def test_theil_sen_2d(): X, y, w, c = gen_toy_problem_2d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) - assert_greater(norm(lstq.coef_ - w), 1.0) + assert norm(lstq.coef_ - w) > 1.0 # Check that Theil-Sen works theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y) @@ -196,7 +196,7 @@ def test_theil_sen_2d(): def test_calc_breakdown_point(): bp = _breakdown_point(1e10, 2) - assert_less(np.abs(bp - 1 + 1 / (np.sqrt(2))), 1.e-6) + assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.e-6 def test_checksubparams_negative_subpopulation(): @@ -257,7 +257,7 @@ def test_theil_sen_parallel(): X, y, w, c = gen_toy_problem_2d() # Check that Least Squares fails lstq = LinearRegression().fit(X, y) - assert_greater(norm(lstq.coef_ - w), 1.0) + assert norm(lstq.coef_ - w) > 1.0 # Check that Theil-Sen works theil_sen = TheilSenRegressor(n_jobs=2, random_state=0, From d9ee2911ff8717a81cabd24a3e0e3bc618e17fee Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:50:32 +0200 Subject: [PATCH 10/22] fix manifold --- sklearn/manifold/tests/test_isomap.py | 6 ++-- sklearn/manifold/tests/test_locally_linear.py | 20 ++++++------ .../manifold/tests/test_spectral_embedding.py | 18 +++++------ sklearn/manifold/tests/test_t_sne.py | 32 +++++++++---------- 4 files changed, 38 insertions(+), 38 deletions(-) diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py index d1a68164ee45c..da8607a31b916 100644 --- a/sklearn/manifold/tests/test_isomap.py +++ b/sklearn/manifold/tests/test_isomap.py @@ -101,7 +101,7 @@ def test_transform(): X_iso2 = iso.transform(X + noise) # Make sure the rms error on re-embedding is comparable to noise_scale - assert_less(np.sqrt(np.mean((X_iso - X_iso2) ** 2)), 2 * noise_scale) + assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale def test_pipeline(): @@ -113,7 +113,7 @@ def test_pipeline(): [('isomap', manifold.Isomap()), ('clf', neighbors.KNeighborsClassifier())]) clf.fit(X, y) - assert_less(.9, clf.score(X, y)) + assert .9 < clf.score(X, y) def test_isomap_clone_bug(): @@ -122,7 +122,7 @@ def test_isomap_clone_bug(): for n_neighbors in [10, 15, 20]: model.set_params(n_neighbors=n_neighbors) model.fit(np.random.rand(50, 2)) - assert_equal(model.nbrs_.n_neighbors, + assert (model.nbrs_.n_neighbors == n_neighbors) diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index a1d48fd49e999..2e05710b79b9a 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -30,7 +30,7 @@ def test_barycenter_kneighbors_graph(): # check that columns sum to one assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3)) pred = np.dot(A.toarray(), X) - assert_less(linalg.norm(pred - X) / X.shape[0], 1) + assert linalg.norm(pred - X) / X.shape[0] < 1 # ---------------------------------------------------------------------- @@ -52,7 +52,7 @@ def test_lle_simple_grid(): N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro') - assert_less(reconstruction_error, tol) + assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) @@ -61,14 +61,14 @@ def test_lle_simple_grid(): reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 - assert_less(reconstruction_error, tol) + assert reconstruction_error < tol assert_almost_equal(clf.reconstruction_error_, reconstruction_error, decimal=1) # re-embed a noisy version of X using the transform method noise = rng.randn(*X.shape) / 100 X_reembedded = clf.transform(X + noise) - assert_less(linalg.norm(X_reembedded - clf.embedding_), tol) + assert linalg.norm(X_reembedded - clf.embedding_) < tol def test_lle_manifold(): @@ -86,7 +86,7 @@ def test_lle_manifold(): N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray() reconstruction_error = linalg.norm(np.dot(N, X) - X) - assert_less(reconstruction_error, tol) + assert reconstruction_error < tol for solver in eigen_solvers: clf.set_params(eigen_solver=solver) @@ -95,10 +95,10 @@ def test_lle_manifold(): reconstruction_error = linalg.norm( np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2 details = ("solver: %s, method: %s" % (solver, method)) - assert_less(reconstruction_error, tol, msg=details) - assert_less(np.abs(clf.reconstruction_error_ - - reconstruction_error), - tol * reconstruction_error, msg=details) + assert reconstruction_error < tol, details + assert (np.abs(clf.reconstruction_error_ - + reconstruction_error) < + tol * reconstruction_error), details # Test the error raised when parameter passed to lle is invalid @@ -124,7 +124,7 @@ def test_pipeline(): [('filter', manifold.LocallyLinearEmbedding(random_state=0)), ('clf', neighbors.KNeighborsClassifier())]) clf.fit(X, y) - assert_less(.9, clf.score(X, y)) + assert .9 < clf.score(X, y) # Test the error raised when the weight matrix is singular diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index 3d7e643d5697f..b6e3879b8beab 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -77,12 +77,12 @@ def test_sparse_graph_connected_component(): for start, stop in zip(boundaries[:-1], boundaries[1:]): component_1 = _graph_connected_component(affinity, p[start]) component_size = stop - start - assert_equal(component_1.sum(), component_size) + assert component_1.sum() == component_size # We should retrieve the same component mask by starting by both ends # of the group component_2 = _graph_connected_component(affinity, p[stop - 1]) - assert_equal(component_2.sum(), component_size) + assert component_2.sum() == component_size assert_array_equal(component_1, component_2) @@ -123,7 +123,7 @@ def test_spectral_embedding_two_components(seed=36): se_precomp.fit_transform(affinity.astype(np.float32)) # thresholding on the first components using 0. label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float") - assert_equal(normalized_mutual_info_score(true_label, label_), 1.0) + assert normalized_mutual_info_score(true_label, label_) == 1.0 def test_spectral_embedding_precomputed_affinity(seed=36): @@ -220,17 +220,17 @@ def test_connectivity(seed=36): [0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 0, 1, 1]]) - assert_equal(_graph_is_connected(graph), False) - assert_equal(_graph_is_connected(sparse.csr_matrix(graph)), False) - assert_equal(_graph_is_connected(sparse.csc_matrix(graph)), False) + assert _graph_is_connected(graph) == False + assert _graph_is_connected(sparse.csr_matrix(graph)) == False + assert _graph_is_connected(sparse.csc_matrix(graph)) == False graph = np.array([[1, 1, 0, 0, 0], [1, 1, 1, 0, 0], [0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 0, 1, 1]]) - assert_equal(_graph_is_connected(graph), True) - assert_equal(_graph_is_connected(sparse.csr_matrix(graph)), True) - assert_equal(_graph_is_connected(sparse.csc_matrix(graph)), True) + assert _graph_is_connected(graph) == True + assert _graph_is_connected(sparse.csr_matrix(graph)) == True + assert _graph_is_connected(sparse.csc_matrix(graph)) == True def test_spectral_embedding_deterministic(): diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 59a2a71660c32..ceb569026489d 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -70,8 +70,8 @@ def flat_function(_, compute_error=True): out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout - assert_equal(error, 1.0) - assert_equal(it, 0) + assert error == 1.0 + assert it == 0 assert("gradient norm" in out) # Maximum number of iterations without improvement @@ -86,8 +86,8 @@ def flat_function(_, compute_error=True): out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout - assert_equal(error, 0.0) - assert_equal(it, 11) + assert error == 0.0 + assert it == 11 assert("did not make any progress" in out) # Maximum number of iterations @@ -102,8 +102,8 @@ def flat_function(_, compute_error=True): out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout - assert_equal(error, 0.0) - assert_equal(it, 10) + assert error == 0.0 + assert it == 10 assert("Iteration 10" in out) @@ -226,13 +226,13 @@ def test_trustworthiness(): # Affine transformation X = random_state.randn(100, 2) - assert_equal(trustworthiness(X, 5.0 + X / 10.0), 1.0) + assert trustworthiness(X, 5.0 + X / 10.0) == 1.0 # Randomly shuffled X = np.arange(100).reshape(-1, 1) X_embedded = X.copy() random_state.shuffle(X_embedded) - assert_less(trustworthiness(X, X_embedded), 0.6) + assert trustworthiness(X, X_embedded) < 0.6 # Completely different X = np.arange(5).reshape(-1, 1) @@ -264,8 +264,8 @@ def test_optimization_minimizes_kl_divergence(): n_iter=n_iter, random_state=0) tsne.fit_transform(X) kl_divergences.append(tsne.kl_divergence_) - assert_less_equal(kl_divergences[1], kl_divergences[0]) - assert_less_equal(kl_divergences[2], kl_divergences[1]) + assert kl_divergences[1] <= kl_divergences[0] + assert kl_divergences[2] <= kl_divergences[1] def test_fit_csr_matrix(): @@ -300,7 +300,7 @@ def test_trustworthiness_not_euclidean_metric(): # 'precomputed' random_state = check_random_state(0) X = random_state.randn(100, 2) - assert_equal(trustworthiness(X, X, metric='cosine'), + assert (trustworthiness(X, X, metric='cosine') == trustworthiness(pairwise_distances(X, metric='cosine'), X, metric='precomputed')) @@ -671,8 +671,8 @@ def test_n_iter_without_progress(): sys.stdout = old_stdout # The output needs to contain the value of n_iter_without_progress - assert_in("did not make any progress during the " - "last -1 episodes. Finished.", out) + assert ("did not make any progress during the " + "last -1 episodes. Finished." in out) def test_min_grad_norm(): @@ -715,7 +715,7 @@ def test_min_grad_norm(): # The gradient norm can be smaller than min_grad_norm at most once, # because in the moment it becomes smaller the optimization stops - assert_less_equal(n_smaller_gradient_norms, 1) + assert n_smaller_gradient_norms <= 1 def test_accessible_kl_divergence(): @@ -790,8 +790,8 @@ def assert_uniform_grid(Y, try_name=None): smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn) largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn) - assert_greater(smallest_to_mean, .5, msg=try_name) - assert_less(largest_to_mean, 2, msg=try_name) + assert smallest_to_mean > .5, try_name + assert largest_to_mean < 2, try_name def test_bh_match_exact(): From c86851c71f5bd759bef5a8c6fb6fcb60d4d36adb Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:54:31 +0200 Subject: [PATCH 11/22] fix metrics --- .../metrics/cluster/tests/test_bicluster.py | 24 +-- .../metrics/cluster/tests/test_supervised.py | 24 +-- .../cluster/tests/test_unsupervised.py | 22 +- sklearn/metrics/tests/test_classification.py | 198 +++++++++--------- sklearn/metrics/tests/test_common.py | 8 +- sklearn/metrics/tests/test_pairwise.py | 52 ++--- sklearn/metrics/tests/test_ranking.py | 78 +++---- sklearn/metrics/tests/test_regression.py | 12 +- sklearn/metrics/tests/test_score_objects.py | 14 +- 9 files changed, 216 insertions(+), 216 deletions(-) diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py index 1ca98b744a7b1..d98ee1fc86b20 100644 --- a/sklearn/metrics/cluster/tests/test_bicluster.py +++ b/sklearn/metrics/cluster/tests/test_bicluster.py @@ -14,10 +14,10 @@ def test_jaccard(): a3 = np.array([False, True, True, False]) a4 = np.array([False, False, True, True]) - assert_equal(_jaccard(a1, a1, a1, a1), 1) - assert_equal(_jaccard(a1, a1, a2, a2), 0.25) - assert_equal(_jaccard(a1, a1, a3, a3), 1.0 / 7) - assert_equal(_jaccard(a1, a1, a4, a4), 0) + assert _jaccard(a1, a1, a1, a1) == 1 + assert _jaccard(a1, a1, a2, a2) == 0.25 + assert _jaccard(a1, a1, a3, a3) == 1.0 / 7 + assert _jaccard(a1, a1, a4, a4) == 0 def test_consensus_score(): @@ -25,15 +25,15 @@ def test_consensus_score(): [False, False, True, True]] b = a[::-1] - assert_equal(consensus_score((a, a), (a, a)), 1) - assert_equal(consensus_score((a, a), (b, b)), 1) - assert_equal(consensus_score((a, b), (a, b)), 1) - assert_equal(consensus_score((a, b), (b, a)), 1) + assert consensus_score((a, a), (a, a)) == 1 + assert consensus_score((a, a), (b, b)) == 1 + assert consensus_score((a, b), (a, b)) == 1 + assert consensus_score((a, b), (b, a)) == 1 - assert_equal(consensus_score((a, a), (b, a)), 0) - assert_equal(consensus_score((a, a), (a, b)), 0) - assert_equal(consensus_score((b, b), (a, b)), 0) - assert_equal(consensus_score((b, b), (b, a)), 0) + assert consensus_score((a, a), (b, a)) == 0 + assert consensus_score((a, a), (a, b)) == 0 + assert consensus_score((b, b), (a, b)) == 0 + assert consensus_score((b, b), (b, a)) == 0 def test_consensus_score_issue2445(): diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py index eef3a7f46482d..a91ef7ef4ec4c 100644 --- a/sklearn/metrics/cluster/tests/test_supervised.py +++ b/sklearn/metrics/cluster/tests/test_supervised.py @@ -62,13 +62,13 @@ def test_generalized_average(): @ignore_warnings(category=FutureWarning) def test_perfect_matches(): for score_func in score_funcs: - assert_equal(score_func([], []), 1.0) - assert_equal(score_func([0], [1]), 1.0) - assert_equal(score_func([0, 0, 0], [0, 0, 0]), 1.0) - assert_equal(score_func([0, 1, 0], [42, 7, 42]), 1.0) - assert_equal(score_func([0., 1., 0.], [42., 7., 42.]), 1.0) - assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0) - assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0) + assert score_func([], []) == 1.0 + assert score_func([0], [1]) == 1.0 + assert score_func([0, 0, 0], [0, 0, 0]) == 1.0 + assert score_func([0, 1, 0], [42, 7, 42]) == 1.0 + assert score_func([0., 1., 0.], [42., 7., 42.]) == 1.0 + assert score_func([0., 1., 2.], [42., 7., 2.]) == 1.0 + assert score_func([0, 1, 2], [42, 7, 2]) == 1.0 score_funcs_with_changing_means = [ normalized_mutual_info_score, adjusted_mutual_info_score, @@ -213,7 +213,7 @@ def test_adjusted_mutual_info_score(): ami = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami, 0.27821, 5) ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) - assert_equal(ami, 1.0) + assert ami == 1.0 # Test with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() @@ -275,10 +275,10 @@ def test_exactly_zero_info_score(): for i in np.logspace(1, 4, 4).astype(np.int): labels_a, labels_b = (np.ones(i, dtype=np.int), np.arange(i, dtype=np.int)) - assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) - assert_equal(v_measure_score(labels_a, labels_b), 0.0) - assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0) - assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0) + assert normalized_mutual_info_score(labels_a, labels_b) == 0.0 + assert v_measure_score(labels_a, labels_b) == 0.0 + assert adjusted_mutual_info_score(labels_a, labels_b) == 0.0 + assert normalized_mutual_info_score(labels_a, labels_b) == 0.0 for method in ["min", "geometric", "arithmetic", "max"]: assert adjusted_mutual_info_score(labels_a, labels_b, method) == 0.0 diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py index 4b061313794e0..29dfd930b0702 100644 --- a/sklearn/metrics/cluster/tests/test_unsupervised.py +++ b/sklearn/metrics/cluster/tests/test_unsupervised.py @@ -32,7 +32,7 @@ def test_silhouette(): # Given that the actual labels are used, we can assume that S would be # positive. score_precomputed = silhouette_score(D, y, metric='precomputed') - assert_greater(score_precomputed, 0) + assert score_precomputed > 0 # Test without calculating D score_euclidean = silhouette_score(X, y, metric='euclidean') pytest.approx(score_precomputed, score_euclidean) @@ -50,8 +50,8 @@ def test_silhouette(): score_euclidean = silhouette_score(X, y, metric='euclidean', sample_size=int(X.shape[0] / 2), random_state=0) - assert_greater(score_precomputed, 0) - assert_greater(score_euclidean, 0) + assert score_precomputed > 0 + assert score_euclidean > 0 pytest.approx(score_euclidean, score_precomputed) if X is X_dense: @@ -156,8 +156,8 @@ def test_non_encoded_labels(): dataset = datasets.load_iris() X = dataset.data labels = dataset.target - assert_equal( - silhouette_score(X, labels * 2 + 10), silhouette_score(X, labels)) + assert ( + silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels)) assert_array_equal( silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels)) @@ -166,8 +166,8 @@ def test_non_numpy_labels(): dataset = datasets.load_iris() X = dataset.data y = dataset.target - assert_equal( - silhouette_score(list(X), list(y)), silhouette_score(X, y)) + assert ( + silhouette_score(list(X), list(y)) == silhouette_score(X, y)) def assert_raises_on_only_one_label(func): @@ -192,12 +192,12 @@ def test_calinski_harabasz_score(): assert_raises_on_all_points_same_cluster(calinski_harabasz_score) # Assert the value is 1. when all samples are equals - assert_equal(1., calinski_harabasz_score(np.ones((10, 2)), - [0] * 5 + [1] * 5)) + assert 1. == calinski_harabasz_score(np.ones((10, 2)), + [0] * 5 + [1] * 5) # Assert the value is 0. when all the mean cluster are equal - assert_equal(0., calinski_harabasz_score([[-1, -1], [1, 1]] * 10, - [0] * 10 + [1] * 10)) + assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10, + [0] * 10 + [1] * 10) # General case (with non numpy arrays) X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 + diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index ed194b3c7e995..d51631a5ff6f9 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -160,13 +160,13 @@ def test_multilabel_accuracy_score_subset_accuracy(): y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) - assert_equal(accuracy_score(y1, y2), 0.5) - assert_equal(accuracy_score(y1, y1), 1) - assert_equal(accuracy_score(y2, y2), 1) - assert_equal(accuracy_score(y2, np.logical_not(y2)), 0) - assert_equal(accuracy_score(y1, np.logical_not(y1)), 0) - assert_equal(accuracy_score(y1, np.zeros(y1.shape)), 0) - assert_equal(accuracy_score(y2, np.zeros(y1.shape)), 0) + assert accuracy_score(y1, y2) == 0.5 + assert accuracy_score(y1, y1) == 1 + assert accuracy_score(y2, y2) == 1 + assert accuracy_score(y2, np.logical_not(y2)) == 0 + assert accuracy_score(y1, np.logical_not(y1)) == 0 + assert accuracy_score(y1, np.zeros(y1.shape)) == 0 + assert accuracy_score(y2, np.zeros(y1.shape)) == 0 def test_precision_recall_f1_score_binary(): @@ -204,13 +204,13 @@ def test_precision_recall_f_binary_single_class(): # Test precision, recall and F1 score behave with a single positive or # negative class # Such a case may occur with non-stratified cross-validation - assert_equal(1., precision_score([1, 1], [1, 1])) - assert_equal(1., recall_score([1, 1], [1, 1])) - assert_equal(1., f1_score([1, 1], [1, 1])) + assert 1. == precision_score([1, 1], [1, 1]) + assert 1. == recall_score([1, 1], [1, 1]) + assert 1. == f1_score([1, 1], [1, 1]) - assert_equal(0., precision_score([-1, -1], [-1, -1])) - assert_equal(0., recall_score([-1, -1], [-1, -1])) - assert_equal(0., f1_score([-1, -1], [-1, -1])) + assert 0. == precision_score([-1, -1], [-1, -1]) + assert 0. == recall_score([-1, -1], [-1, -1]) + assert 0. == f1_score([-1, -1], [-1, -1]) @ignore_warnings @@ -283,7 +283,7 @@ def test_precision_recall_f_ignored_labels(): # ensure the above were meaningful tests: for average in ['macro', 'weighted', 'micro']: - assert_not_equal(recall_13(average=average), + assert (recall_13(average=average) != recall_all(average=average)) @@ -307,7 +307,7 @@ def test_average_precision_score_duplicate_values(): # test statistic, the average_precision_score should be 1 y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1] y_score = [0, .1, .1, .4, .5, .6, .6, .9, .9, 1, 1] - assert_equal(average_precision_score(y_true, y_score), 1) + assert average_precision_score(y_true, y_score) == 1 def test_average_precision_score_tied_values(): @@ -320,7 +320,7 @@ def test_average_precision_score_tied_values(): # than one. y_true = [0, 1, 1] y_score = [.5, .5, .6] - assert_not_equal(average_precision_score(y_true, y_score), 1.) + assert average_precision_score(y_true, y_score) != 1. @ignore_warnings @@ -512,12 +512,12 @@ def test_cohen_kappa(): y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50) kappa = cohen_kappa_score(y1, y2) assert_almost_equal(kappa, .348, decimal=3) - assert_equal(kappa, cohen_kappa_score(y2, y1)) + assert kappa == cohen_kappa_score(y2, y1) # Add spurious labels and ignore them. y1 = np.append(y1, [2] * 4) y2 = np.append(y2, [2] * 4) - assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa) + assert cohen_kappa_score(y1, y2, labels=[0, 1]) == kappa assert_almost_equal(cohen_kappa_score(y1, y1), 1.) @@ -538,8 +538,8 @@ def test_cohen_kappa(): @ignore_warnings def test_matthews_corrcoef_nan(): - assert_equal(matthews_corrcoef([0], [1]), 0.0) - assert_equal(matthews_corrcoef([0, 0], [0, 1]), 0.0) + assert matthews_corrcoef([0], [1]) == 0.0 + assert matthews_corrcoef([0, 0], [0, 1]) == 0.0 def test_matthews_corrcoef_against_numpy_corrcoef(): @@ -792,15 +792,15 @@ def test_precision_recall_f1_score_binary_averaged(): average=None) p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='macro') - assert_equal(p, np.mean(ps)) - assert_equal(r, np.mean(rs)) - assert_equal(f, np.mean(fs)) + assert p == np.mean(ps) + assert r == np.mean(rs) + assert f == np.mean(fs) p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted') support = np.bincount(y_true) - assert_equal(p, np.average(ps, weights=support)) - assert_equal(r, np.average(rs, weights=support)) - assert_equal(f, np.average(fs, weights=support)) + assert p == np.average(ps, weights=support) + assert r == np.average(rs, weights=support) + assert f == np.average(fs, weights=support) def test_zero_precision_recall(): @@ -854,28 +854,28 @@ def test_confusion_matrix_dtype(): weight = np.ones(len(y)) # confusion_matrix returns int64 by default cm = confusion_matrix(y, y) - assert_equal(cm.dtype, np.int64) + assert cm.dtype == np.int64 # The dtype of confusion_matrix is always 64 bit for dtype in [np.bool_, np.int32, np.uint64]: cm = confusion_matrix(y, y, sample_weight=weight.astype(dtype, copy=False)) - assert_equal(cm.dtype, np.int64) + assert cm.dtype == np.int64 for dtype in [np.float32, np.float64, None, object]: cm = confusion_matrix(y, y, sample_weight=weight.astype(dtype, copy=False)) - assert_equal(cm.dtype, np.float64) + assert cm.dtype == np.float64 # np.iinfo(np.uint32).max should be accumulated correctly weight = np.full(len(y), 4294967295, dtype=np.uint32) cm = confusion_matrix(y, y, sample_weight=weight) - assert_equal(cm[0, 0], 4294967295) - assert_equal(cm[1, 1], 8589934590) + assert cm[0, 0] == 4294967295 + assert cm[1, 1] == 8589934590 # np.iinfo(np.int64).max should cause an overflow weight = np.full(len(y), 9223372036854775807, dtype=np.int64) cm = confusion_matrix(y, y, sample_weight=weight) - assert_equal(cm[0, 0], 9223372036854775807) - assert_equal(cm[1, 1], -2) + assert cm[0, 0] == 9223372036854775807 + assert cm[1, 1] == -2 def test_classification_report_multiclass(): @@ -898,7 +898,7 @@ def test_classification_report_multiclass(): report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_balanced(): @@ -916,7 +916,7 @@ def test_classification_report_multiclass_balanced(): weighted avg 0.33 0.33 0.33 9 """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_with_label_detection(): @@ -936,7 +936,7 @@ def test_classification_report_multiclass_with_label_detection(): weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_with_digits(): @@ -959,7 +959,7 @@ def test_classification_report_multiclass_with_digits(): report = classification_report( y_true, y_pred, labels=np.arange(len(iris.target_names)), target_names=iris.target_names, digits=5) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_with_string_label(): @@ -980,7 +980,7 @@ def test_classification_report_multiclass_with_string_label(): weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report expected_report = """\ precision recall f1-score support @@ -995,7 +995,7 @@ def test_classification_report_multiclass_with_string_label(): """ report = classification_report(y_true, y_pred, target_names=["a", "b", "c"]) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_with_unicode_label(): @@ -1017,7 +1017,7 @@ def test_classification_report_multiclass_with_unicode_label(): weighted avg 0.51 0.53 0.47 75 """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_multiclass_with_long_string_label(): @@ -1040,7 +1040,7 @@ def test_classification_report_multiclass_with_long_string_label(): """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report def test_classification_report_labels_target_names_unequal_length(): @@ -1099,7 +1099,7 @@ def test_multilabel_classification_report(): """ report = classification_report(y_true, y_pred) - assert_equal(report, expected_report) + assert report == expected_report def test_multilabel_zero_one_loss_subset(): @@ -1107,13 +1107,13 @@ def test_multilabel_zero_one_loss_subset(): y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) - assert_equal(zero_one_loss(y1, y2), 0.5) - assert_equal(zero_one_loss(y1, y1), 0) - assert_equal(zero_one_loss(y2, y2), 0) - assert_equal(zero_one_loss(y2, np.logical_not(y2)), 1) - assert_equal(zero_one_loss(y1, np.logical_not(y1)), 1) - assert_equal(zero_one_loss(y1, np.zeros(y1.shape)), 1) - assert_equal(zero_one_loss(y2, np.zeros(y1.shape)), 1) + assert zero_one_loss(y1, y2) == 0.5 + assert zero_one_loss(y1, y1) == 0 + assert zero_one_loss(y2, y2) == 0 + assert zero_one_loss(y2, np.logical_not(y2)) == 1 + assert zero_one_loss(y1, np.logical_not(y1)) == 1 + assert zero_one_loss(y1, np.zeros(y1.shape)) == 1 + assert zero_one_loss(y2, np.zeros(y1.shape)) == 1 def test_multilabel_hamming_loss(): @@ -1122,18 +1122,18 @@ def test_multilabel_hamming_loss(): y2 = np.array([[0, 0, 1], [1, 0, 1]]) w = np.array([1, 3]) - assert_equal(hamming_loss(y1, y2), 1 / 6) - assert_equal(hamming_loss(y1, y1), 0) - assert_equal(hamming_loss(y2, y2), 0) - assert_equal(hamming_loss(y2, 1 - y2), 1) - assert_equal(hamming_loss(y1, 1 - y1), 1) - assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) - assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) - assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12) - assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12) - assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3) + assert hamming_loss(y1, y2) == 1 / 6 + assert hamming_loss(y1, y1) == 0 + assert hamming_loss(y2, y2) == 0 + assert hamming_loss(y2, 1 - y2) == 1 + assert hamming_loss(y1, 1 - y1) == 1 + assert hamming_loss(y1, np.zeros(y1.shape)) == 4 / 6 + assert hamming_loss(y2, np.zeros(y1.shape)) == 0.5 + assert hamming_loss(y1, y2, sample_weight=w) == 1. / 12 + assert hamming_loss(y1, 1-y2, sample_weight=w) == 11. / 12 + assert hamming_loss(y1, np.zeros_like(y1), sample_weight=w) == 2. / 3 # sp_hamming only works with 1-D arrays - assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0])) + assert hamming_loss(y1[0], y2[0]) == sp_hamming(y1[0], y2[0]) assert_warns_message(DeprecationWarning, "The labels parameter is unused. It was" " deprecated in version 0.21 and" @@ -1335,7 +1335,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 1.5 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2.5 / 1.5 * 0.25) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1345,7 +1345,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 0.5) assert_almost_equal(f, 0.5) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1356,7 +1356,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 1.5 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2.5 / 1.5 * 0.25) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1369,7 +1369,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 0.5) assert_almost_equal(f, 0.5) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.5) @@ -1401,7 +1401,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 0.25) assert_almost_equal(r, 0.25) assert_almost_equal(f, 2 * 0.25 * 0.25 / 0.5) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1411,7 +1411,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 0.25) assert_almost_equal(r, 0.125) assert_almost_equal(f, 2 / 12) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1421,7 +1421,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 2 / 4) assert_almost_equal(r, 1 / 4) assert_almost_equal(f, 2 / 3 * 2 / 4) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1436,7 +1436,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 1 / 6) assert_almost_equal(r, 1 / 6) assert_almost_equal(f, 2 / 4 * 1 / 3) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.1666, 2) @@ -1466,7 +1466,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 1.5 / 4) assert_almost_equal(f, 2.5 / (4 * 1.5)) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1476,7 +1476,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 2 / 3) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5)) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1486,7 +1486,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 3 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, (2 / 1.5 + 1) / 4) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1499,7 +1499,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 1 / 3) assert_almost_equal(r, 1 / 3) assert_almost_equal(f, 1 / 3) - assert_equal(s, None) + assert s == None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.333, 2) @@ -1518,7 +1518,7 @@ def test_precision_recall_f1_no_labels(beta, average): assert_almost_equal(p, 0) assert_almost_equal(r, 0) assert_almost_equal(f, 0) - assert_equal(s, None) + assert s == None fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, @@ -1603,10 +1603,10 @@ def test_prf_warnings(): precision_recall_fscore_support([0, 0], [0, 0], average="binary") msg = ('Recall and F-score are ill-defined and ' 'being set to 0.0 due to no true samples.') - assert_equal(str(record.pop().message), msg) + assert str(record.pop().message) == msg msg = ('Precision and F-score are ill-defined and ' 'being set to 0.0 due to no predicted samples.') - assert_equal(str(record.pop().message), msg) + assert str(record.pop().message) == msg def test_recall_warnings(): @@ -1619,11 +1619,11 @@ def test_recall_warnings(): recall_score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.') recall_score([0, 0], [0, 0]) - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'Recall is ill-defined and ' 'being set to 0.0 due to no true samples.') @@ -1634,11 +1634,11 @@ def test_precision_warnings(): precision_score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.') precision_score([0, 0], [0, 0]) - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'Precision is ill-defined and ' 'being set to 0.0 due to no predicted samples.') @@ -1656,20 +1656,20 @@ def test_fscore_warnings(): score(np.array([[1, 1], [1, 1]]), np.array([[0, 0], [0, 0]]), average='micro') - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'F-score is ill-defined and ' 'being set to 0.0 due to no predicted samples.') score(np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]), average='micro') - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'F-score is ill-defined and ' 'being set to 0.0 due to no true samples.') score([0, 0], [0, 0]) - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'F-score is ill-defined and ' 'being set to 0.0 due to no true samples.') - assert_equal(str(record.pop().message), + assert (str(record.pop().message) == 'F-score is ill-defined and ' 'being set to 0.0 due to no predicted samples.') @@ -1772,10 +1772,10 @@ def test__check_targets(): else: merged_type, y1out, y2out = _check_targets(y1, y2) - assert_equal(merged_type, expected) + assert merged_type == expected if merged_type.startswith('multilabel'): - assert_equal(y1out.format, 'csr') - assert_equal(y2out.format, 'csr') + assert y1out.format == 'csr' + assert y2out.format == 'csr' else: assert_array_equal(y1out, np.squeeze(y1)) assert_array_equal(y2out, np.squeeze(y2)) @@ -1795,17 +1795,17 @@ def test__check_targets_multiclass_with_both_y_true_and_y_pred_binary(): # https://github.com/scikit-learn/scikit-learn/issues/8098 y_true = [0, 1] y_pred = [0, -1] - assert_equal(_check_targets(y_true, y_pred)[0], 'multiclass') + assert _check_targets(y_true, y_pred)[0] == 'multiclass' def test_hinge_loss_binary(): y_true = np.array([-1, 1, 1, -1]) pred_decision = np.array([-8.5, 0.5, 1.5, -0.3]) - assert_equal(hinge_loss(y_true, pred_decision), 1.2 / 4) + assert hinge_loss(y_true, pred_decision) == 1.2 / 4 y_true = np.array([0, 2, 2, 0]) pred_decision = np.array([-8.5, 0.5, 1.5, -0.3]) - assert_equal(hinge_loss(y_true, pred_decision), 1.2 / 4) + assert hinge_loss(y_true, pred_decision) == 1.2 / 4 def test_hinge_loss_multiclass(): @@ -1828,7 +1828,7 @@ def test_hinge_loss_multiclass(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) - assert_equal(hinge_loss(y_true, pred_decision), + assert (hinge_loss(y_true, pred_decision) == dummy_hinge_loss) @@ -1866,7 +1866,7 @@ def test_hinge_loss_multiclass_with_missing_labels(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) - assert_equal(hinge_loss(y_true, pred_decision, labels=labels), + assert (hinge_loss(y_true, pred_decision, labels=labels) == dummy_hinge_loss) @@ -1893,7 +1893,7 @@ def test_hinge_loss_multiclass_invariance_lists(): ]) np.clip(dummy_losses, 0, None, out=dummy_losses) dummy_hinge_loss = np.mean(dummy_losses) - assert_equal(hinge_loss(y_true, pred_decision), + assert (hinge_loss(y_true, pred_decision) == dummy_hinge_loss) @@ -2044,10 +2044,10 @@ def test_multilabel_jaccard_similarity_score_deprecation(): # size(y1 \union y2) = [2, 2] jss = partial(assert_warns, DeprecationWarning, jaccard_similarity_score) - assert_equal(jss(y1, y2), 0.75) - assert_equal(jss(y1, y1), 1) - assert_equal(jss(y2, y2), 1) - assert_equal(jss(y2, np.logical_not(y2)), 0) - assert_equal(jss(y1, np.logical_not(y1)), 0) - assert_equal(jss(y1, np.zeros(y1.shape)), 0) - assert_equal(jss(y2, np.zeros(y1.shape)), 0) + assert jss(y1, y2) == 0.75 + assert jss(y1, y1) == 1 + assert jss(y2, y2) == 1 + assert jss(y2, np.logical_not(y2)) == 0 + assert jss(y1, np.logical_not(y1)) == 0 + assert jss(y1, np.zeros(y1.shape)) == 0 + assert jss(y2, np.zeros(y1.shape)) == 0 diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 86ab2f6281678..13ab6ecd3d804 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -473,13 +473,13 @@ def test_symmetry(): y_pred_bin = random_state.randint(0, 2, size=(20, 25)) # We shouldn't forget any metrics - assert_equal(SYMMETRIC_METRICS.union( + assert (SYMMETRIC_METRICS.union( NOT_SYMMETRIC_METRICS, set(THRESHOLDED_METRICS), - METRIC_UNDEFINED_BINARY_MULTICLASS), + METRIC_UNDEFINED_BINARY_MULTICLASS) == set(ALL_METRICS)) - assert_equal( - SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS), + assert ( + SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS) == set()) # Symmetric metric diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index 6fd39333fb7a7..ecf943a4c0bcc 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -95,15 +95,15 @@ def test_pairwise_distances(): # scipy.spatial. S = pairwise_distances(X, metric="cityblock") S2 = pairwise_distances(X, metric=cityblock) - assert_equal(S.shape[0], S.shape[1]) - assert_equal(S.shape[0], X.shape[0]) + assert S.shape[0] == S.shape[1] + assert S.shape[0] == X.shape[0] assert_array_almost_equal(S, S2) # The manhattan metric should be equivalent to cityblock. S = pairwise_distances(X, Y, metric="manhattan") S2 = pairwise_distances(X, Y, metric=cityblock) - assert_equal(S.shape[0], X.shape[0]) - assert_equal(S.shape[1], Y.shape[0]) + assert S.shape[0] == X.shape[0] + assert S.shape[1] == Y.shape[0] assert_array_almost_equal(S, S2) # Test cosine as a string metric versus cosine callable @@ -111,8 +111,8 @@ def test_pairwise_distances(): # while the function cosine is scipy.spatial S = pairwise_distances(X, Y, metric="cosine") S2 = pairwise_distances(X, Y, metric=cosine) - assert_equal(S.shape[0], X.shape[0]) - assert_equal(S.shape[1], Y.shape[0]) + assert S.shape[0] == X.shape[0] + assert S.shape[1] == Y.shape[0] assert_array_almost_equal(S, S2) # Test with sparse X and Y, @@ -217,7 +217,7 @@ def test_pairwise_precomputed(func): # Test always returns float dtype S = func(np.array([[1]], dtype='int'), metric='precomputed') - assert_equal('f', S.dtype.kind) + assert 'f' == S.dtype.kind # Test converts list to array-like S = func([[1.]], metric='precomputed') @@ -277,7 +277,7 @@ def test_pairwise_callable_nonstrict_metric(): # paired_distances should allow callable metric where metric(x, x) != 0 # Knowing that the callable is a strict metric would allow the diagonal to # be left uncalculated and set to 0. - assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5) + assert pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0] == 5 # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS. @@ -413,8 +413,8 @@ def test_pairwise_distances_argmin_min(): assert_array_almost_equal(idxsp, expected_idx) assert_array_almost_equal(valssp, expected_vals) # We don't want np.matrix here - assert_equal(type(idxsp), np.ndarray) - assert_equal(type(valssp), np.ndarray) + assert type(idxsp) == np.ndarray + assert type(valssp) == np.ndarray # euclidean metric squared idx, vals = pairwise_distances_argmin_min(X, Y, metric="euclidean", @@ -830,7 +830,7 @@ def test_chi_square_kernel(): K_add = additive_chi2_kernel(X, Y) gamma = 0.1 K = chi2_kernel(X, Y, gamma=gamma) - assert_equal(K.dtype, np.float) + assert K.dtype == np.float for i, x in enumerate(X): for j, y in enumerate(Y): chi2 = -np.sum((x - y) ** 2 / (x + y)) @@ -848,21 +848,21 @@ def test_chi_square_kernel(): X = rng.random_sample((5, 4)).astype(np.float32) Y = rng.random_sample((10, 4)).astype(np.float32) K = chi2_kernel(X, Y) - assert_equal(K.dtype, np.float32) + assert K.dtype == np.float32 # check integer type gets converted, # check that zeros are handled X = rng.random_sample((10, 4)).astype(np.int32) K = chi2_kernel(X, X) assert np.isfinite(K).all() - assert_equal(K.dtype, np.float) + assert K.dtype == np.float # check that kernel of similar things is greater than dissimilar ones X = [[.3, .7], [1., 0]] Y = [[0, 1], [.9, .1]] K = chi2_kernel(X, Y) - assert_greater(K[0, 0], K[0, 1]) - assert_greater(K[1, 1], K[1, 0]) + assert K[0, 0] > K[0, 1] + assert K[1, 1] > K[1, 0] # test negative input assert_raises(ValueError, chi2_kernel, [[0, -1]]) @@ -1034,15 +1034,15 @@ def test_check_sparse_arrays(): # compare their difference because testing csr matrices for # equality with '==' does not work as expected. assert issparse(XA_checked) - assert_equal(abs(XA_sparse - XA_checked).sum(), 0) + assert abs(XA_sparse - XA_checked).sum() == 0 assert issparse(XB_checked) - assert_equal(abs(XB_sparse - XB_checked).sum(), 0) + assert abs(XB_sparse - XB_checked).sum() == 0 XA_checked, XA_2_checked = check_pairwise_arrays(XA_sparse, XA_sparse) assert issparse(XA_checked) - assert_equal(abs(XA_sparse - XA_checked).sum(), 0) + assert abs(XA_sparse - XA_checked).sum() == 0 assert issparse(XA_2_checked) - assert_equal(abs(XA_2_checked - XA_checked).sum(), 0) + assert abs(XA_2_checked - XA_checked).sum() == 0 def tuplify(X): @@ -1074,24 +1074,24 @@ def test_check_preserve_type(): XB = np.resize(np.arange(40), (5, 8)).astype(np.float32) XA_checked, XB_checked = check_pairwise_arrays(XA, None) - assert_equal(XA_checked.dtype, np.float32) + assert XA_checked.dtype == np.float32 # both float32 XA_checked, XB_checked = check_pairwise_arrays(XA, XB) - assert_equal(XA_checked.dtype, np.float32) - assert_equal(XB_checked.dtype, np.float32) + assert XA_checked.dtype == np.float32 + assert XB_checked.dtype == np.float32 # mismatched A XA_checked, XB_checked = check_pairwise_arrays(XA.astype(np.float), XB) - assert_equal(XA_checked.dtype, np.float) - assert_equal(XB_checked.dtype, np.float) + assert XA_checked.dtype == np.float + assert XB_checked.dtype == np.float # mismatched B XA_checked, XB_checked = check_pairwise_arrays(XA, XB.astype(np.float)) - assert_equal(XA_checked.dtype, np.float) - assert_equal(XB_checked.dtype, np.float) + assert XA_checked.dtype == np.float + assert XB_checked.dtype == np.float @pytest.mark.parametrize("n_jobs", [1, 2]) diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index d634bd59e0fe0..78e2d2a69a48c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -185,8 +185,8 @@ def test_roc_curve(drop): roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, expected_auc, decimal=2) assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred)) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape def test_roc_curve_end_points(): @@ -196,10 +196,10 @@ def test_roc_curve_end_points(): y_true = np.array([0] * 50 + [1] * 50) y_pred = rng.randint(3, size=100) fpr, tpr, thr = roc_curve(y_true, y_pred, drop_intermediate=True) - assert_equal(fpr[0], 0) - assert_equal(fpr[-1], 1) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thr.shape) + assert fpr[0] == 0 + assert fpr[-1] == 1 + assert fpr.shape == tpr.shape + assert fpr.shape == thr.shape def test_roc_returns_consistency(): @@ -217,8 +217,8 @@ def test_roc_returns_consistency(): # compare tpr and tpr_correct to see if the thresholds' order was correct assert_array_almost_equal(tpr, tpr_correct, decimal=2) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape def test_roc_curve_multi(): @@ -235,8 +235,8 @@ def test_roc_curve_confidence(): fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5) roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, 0.90, decimal=2) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape def test_roc_curve_hard(): @@ -248,23 +248,23 @@ def test_roc_curve_hard(): fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, 0.50, decimal=2) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape # always predict zero trivial_pred = np.zeros(y_true.shape) fpr, tpr, thresholds = roc_curve(y_true, trivial_pred) roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, 0.50, decimal=2) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape # hard decisions fpr, tpr, thresholds = roc_curve(y_true, pred) roc_auc = auc(fpr, tpr) assert_array_almost_equal(roc_auc, 0.78, decimal=2) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape def test_roc_curve_one_label(): @@ -275,8 +275,8 @@ def test_roc_curve_one_label(): fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred) # all true labels, all fpr should be nan assert_array_equal(fpr, np.full(len(thresholds), np.nan)) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape # assert there are warnings fpr, tpr, thresholds = assert_warns(w, roc_curve, @@ -284,8 +284,8 @@ def test_roc_curve_one_label(): y_pred) # all negative labels, all tpr should be nan assert_array_equal(tpr, np.full(len(thresholds), np.nan)) - assert_equal(fpr.shape, tpr.shape) - assert_equal(fpr.shape, thresholds.shape) + assert fpr.shape == tpr.shape + assert fpr.shape == thresholds.shape def test_roc_curve_toydata(): @@ -403,8 +403,8 @@ def test_roc_curve_fpr_tpr_increasing(): y_score = [0.1, 0.7, 0.3, 0.4, 0.5] sample_weight = np.repeat(0.2, 5) fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight) - assert_equal((np.diff(fpr) < 0).sum(), 0) - assert_equal((np.diff(tpr) < 0).sum(), 0) + assert (np.diff(fpr) < 0).sum() == 0 + assert (np.diff(tpr) < 0).sum() == 0 def test_auc(): @@ -505,8 +505,8 @@ def test_precision_recall_curve(): assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.])) assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.])) assert_array_almost_equal(t, np.array([1, 2, 3, 4])) - assert_equal(p.size, r.size) - assert_equal(p.size, t.size + 1) + assert p.size == r.size + assert p.size == t.size + 1 def _test_precision_recall_curve(y_true, probas_pred): @@ -518,13 +518,13 @@ def _test_precision_recall_curve(y_true, probas_pred): average_precision_score(y_true, probas_pred)) assert_almost_equal(_average_precision(y_true, probas_pred), precision_recall_auc, decimal=3) - assert_equal(p.size, r.size) - assert_equal(p.size, thresholds.size + 1) + assert p.size == r.size + assert p.size == thresholds.size + 1 # Smoke test in the case of proba having only one value p, r, thresholds = precision_recall_curve(y_true, np.zeros_like(probas_pred)) - assert_equal(p.size, r.size) - assert_equal(p.size, thresholds.size + 1) + assert p.size == r.size + assert p.size == thresholds.size + 1 def test_precision_recall_curve_errors(): @@ -648,7 +648,7 @@ def test_average_precision_constant_values(): y_score = np.ones(100) # The precision is then the fraction of positive whatever the recall # is, as there is only one threshold: - assert_equal(average_precision_score(y_true, y_score), .25) + assert average_precision_score(y_true, y_score) == .25 def test_average_precision_score_pos_label_errors(): @@ -681,17 +681,17 @@ def test_score_scale_invariance(): roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred) roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred) roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10) - assert_equal(roc_auc, roc_auc_scaled_up) - assert_equal(roc_auc, roc_auc_scaled_down) - assert_equal(roc_auc, roc_auc_shifted) + assert roc_auc == roc_auc_scaled_up + assert roc_auc == roc_auc_scaled_down + assert roc_auc == roc_auc_shifted pr_auc = average_precision_score(y_true, probas_pred) pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred) pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred) pr_auc_shifted = average_precision_score(y_true, probas_pred - 10) - assert_equal(pr_auc, pr_auc_scaled_up) - assert_equal(pr_auc, pr_auc_scaled_down) - assert_equal(pr_auc, pr_auc_shifted) + assert pr_auc == pr_auc_scaled_up + assert pr_auc == pr_auc_scaled_down + assert pr_auc == pr_auc_shifted def check_lrap_toy(lrap_score): @@ -759,13 +759,13 @@ def check_zero_or_all_relevant_labels(lrap_score): # No relevant labels y_true = np.zeros((1, n_labels)) - assert_equal(lrap_score(y_true, y_score), 1.) - assert_equal(lrap_score(y_true, y_score_ties), 1.) + assert lrap_score(y_true, y_score) == 1. + assert lrap_score(y_true, y_score_ties) == 1. # Only relevant labels y_true = np.ones((1, n_labels)) - assert_equal(lrap_score(y_true, y_score), 1.) - assert_equal(lrap_score(y_true, y_score_ties), 1.) + assert lrap_score(y_true, y_score) == 1. + assert lrap_score(y_true, y_score_ties) == 1. # Degenerate case: only one label assert_almost_equal(lrap_score([[1], [0], [1], [0]], diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index b9b7ade63f68e..a40ec3856f201 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -93,7 +93,7 @@ def test__check_reg_targets(): if type1 == type2 and n_out1 == n_out2: y_type, y_check1, y_check2, multioutput = _check_reg_targets( y1, y2, None) - assert_equal(type1, y_type) + assert type1 == y_type if type1 == 'continuous': assert_array_equal(y_check1, np.reshape(y1, (-1, 1))) assert_array_equal(y_check2, np.reshape(y2, (-1, 1))) @@ -143,8 +143,8 @@ def test_regression_multioutput_array(): r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(r, [0, -3.5], decimal=2) - assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], - multioutput='uniform_average')) + assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], + multioutput='uniform_average') evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(evs, [0, -1.25], decimal=2) @@ -155,11 +155,11 @@ def test_regression_multioutput_array(): y_pred = [[1, 4], [-1, 1]] r2 = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(r2, [1., -3.], decimal=2) - assert_equal(np.mean(r2), r2_score(y_true, y_pred, - multioutput='uniform_average')) + assert np.mean(r2) == r2_score(y_true, y_pred, + multioutput='uniform_average') evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(evs, [1., -3.], decimal=2) - assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred)) + assert np.mean(evs) == explained_variance_score(y_true, y_pred) # Handling msle separately as it does not accept negative inputs. y_true = np.array([[0.5, 1], [1, 2], [7, 6]]) diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 61ef471af3cc1..8ce7fd6389271 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -198,8 +198,8 @@ def check_multimetric_scoring_single_metric_wrapper(*args, **kwargs): if args[0] is not None: assert scorers is not None names, scorers = zip(*scorers.items()) - assert_equal(len(scorers), 1) - assert_equal(names[0], 'score') + assert len(scorers) == 1 + assert names[0] == 'score' scorers = scorers[0] return scorers @@ -224,7 +224,7 @@ def test_check_scoring_and_check_multimetric_scoring(): scorers, is_multi = _check_multimetric_scoring(estimator, scoring) assert is_multi assert isinstance(scorers, dict) - assert_equal(sorted(scorers.keys()), sorted(list(scoring))) + assert sorted(scorers.keys()) == sorted(list(scoring)) assert all([isinstance(scorer, _PredictScorer) for scorer in list(scorers.values())]) @@ -484,10 +484,10 @@ def test_scorer_sample_weight(): sample_weight=sample_weight) ignored = scorer(estimator[name], X_test[10:], target[10:]) unweighted = scorer(estimator[name], X_test, target) - assert_not_equal(weighted, unweighted, - msg="scorer {0} behaves identically when " - "called with sample weights: {1} vs " - "{2}".format(name, weighted, unweighted)) + assert weighted != unweighted, ( + "scorer {0} behaves identically when " + "called with sample weights: {1} vs " + "{2}".format(name, weighted, unweighted)) assert_almost_equal(weighted, ignored, err_msg="scorer {0} behaves differently when " "ignoring samples and setting sample_weight to" From bcbfe65e847ba4ed797c16105807181c92823a8e Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:57:46 +0200 Subject: [PATCH 12/22] cleanup assert_message --- sklearn/datasets/tests/test_samples_generator.py | 9 ++++----- sklearn/ensemble/tests/test_forest.py | 8 +++----- sklearn/tests/test_base.py | 6 +++--- 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index f3e0e20b7dea8..092fcc1290dea 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -103,16 +103,15 @@ def test_make_classification_informative_features(): unique_signs, cluster_index = np.unique(signs, return_inverse=True) - assert_message = ("Wrong number of clusters, or not in distinct " - "quadrants") - assert len(unique_signs) == n_clusters, assert_message + assert len(unique_signs) == n_clusters, ( + "Wrong number of clusters, or not in distinct quadrants") clusters_by_class = defaultdict(set) for cluster, cls in zip(cluster_index, y): clusters_by_class[cls].add(cluster) for clusters in clusters_by_class.values(): - assert_message = "Wrong number of clusters per class" - assert len(clusters) == n_clusters_per_class, assert_message + assert len(clusters) == n_clusters_per_class, ( + "Wrong number of clusters per class") assert (len(clusters_by_class) == n_classes), "Wrong number of classes" diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 93b3309ba5a1a..3ce35a4a2ccea 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -838,13 +838,11 @@ def check_min_weight_fraction_leaf(name): node_weights = np.bincount(out, weights=weights) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_message = ("Failed with {0} " - "min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf)) assert ( np.min(leaf_weights) >= - total_weight * est.min_weight_fraction_leaf), assert_message - + total_weight * est.min_weight_fraction_leaf), ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf)) @pytest.mark.parametrize('name', FOREST_ESTIMATORS) def test_min_weight_fraction_leaf(name): diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 1ed90ecca42ce..190af5a8f6800 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -287,11 +287,11 @@ def test_score_sample_weight(): # generate random sample weights sample_weight = rng.randint(1, 10, size=len(ds.target)) # check that the score with and without sample weights are different - assert_message = ("Unweighted and weighted scores " - "are unexpectedly equal") assert (est.score(ds.data, ds.target) != est.score(ds.data, ds.target, - sample_weight=sample_weight)), assert_message + sample_weight=sample_weight)), ( + "Unweighted and weighted scores " + "are unexpectedly equal") def test_clone_pandas_dataframe(): From 1862c828c6328adeba47f5eddc8faffd9fdaed9d Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:59:09 +0200 Subject: [PATCH 13/22] fix mixture, model_selection --- .../mixture/tests/test_bayesian_mixture.py | 4 +- .../mixture/tests/test_gaussian_mixture.py | 32 ++-- sklearn/model_selection/tests/test_search.py | 80 ++++----- sklearn/model_selection/tests/test_split.py | 152 +++++++++--------- .../model_selection/tests/test_validation.py | 62 +++---- 5 files changed, 165 insertions(+), 165 deletions(-) diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index f4503dfc7d70e..d62920af454e5 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -303,7 +303,7 @@ def test_monotonic_likelihood(): for _ in range(600): prev_lower_bound = current_lower_bound current_lower_bound = bgmm.fit(X).lower_bound_ - assert_greater_equal(current_lower_bound, prev_lower_bound) + assert current_lower_bound >= prev_lower_bound if bgmm.converged_: break @@ -485,4 +485,4 @@ def test_bayesian_mixture_predict_predict_proba(): Y_pred = bgmm.predict(X) Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) - assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95) + assert adjusted_rand_score(Y, Y_pred) >= .95 diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index b40f9a46baf5b..c1f451c7d8495 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -165,13 +165,13 @@ def test_gaussian_mixture_attributes(): covariance_type=covariance_type, init_params=init_params).fit(X) - assert_equal(gmm.n_components, n_components) - assert_equal(gmm.covariance_type, covariance_type) - assert_equal(gmm.tol, tol) - assert_equal(gmm.reg_covar, reg_covar) - assert_equal(gmm.max_iter, max_iter) - assert_equal(gmm.n_init, n_init) - assert_equal(gmm.init_params, init_params) + assert gmm.n_components == n_components + assert gmm.covariance_type == covariance_type + assert gmm.tol == tol + assert gmm.reg_covar == reg_covar + assert gmm.max_iter == max_iter + assert gmm.n_init == n_init + assert gmm.init_params == init_params def test_check_X(): @@ -567,7 +567,7 @@ def test_gaussian_mixture_predict_predict_proba(): Y_pred = g.predict(X) Y_pred_proba = g.predict_proba(X).argmax(axis=1) assert_array_equal(Y_pred, Y_pred_proba) - assert_greater(adjusted_rand_score(Y, Y_pred), .95) + assert adjusted_rand_score(Y, Y_pred) > .95 @pytest.mark.filterwarnings("ignore:.*did not converge.*") @@ -595,7 +595,7 @@ def test_gaussian_mixture_fit_predict(seed, max_iter, tol): Y_pred1 = f.fit(X).predict(X) Y_pred2 = g.fit_predict(X) assert_array_equal(Y_pred1, Y_pred2) - assert_greater(adjusted_rand_score(Y, Y_pred2), .95) + assert adjusted_rand_score(Y, Y_pred2) > .95 def test_gaussian_mixture_fit_predict_n_init(): @@ -706,7 +706,7 @@ def test_multiple_init(): train2 = GaussianMixture(n_components=n_components, covariance_type=cv_type, random_state=0, n_init=5).fit(X).score(X) - assert_greater_equal(train2, train1) + assert train2 >= train1 def test_gaussian_mixture_n_parameters(): @@ -719,7 +719,7 @@ def test_gaussian_mixture_n_parameters(): g = GaussianMixture( n_components=n_components, covariance_type=cv_type, random_state=rng).fit(X) - assert_equal(g._n_parameters(), n_params[cv_type]) + assert g._n_parameters() == n_params[cv_type] def test_bic_1d_1component(): @@ -874,7 +874,7 @@ def test_score(): gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0, random_state=rng, covariance_type=covar_type).fit(X) - assert_greater(gmm2.score(X), gmm1.score(X)) + assert gmm2.score(X) > gmm1.score(X) def test_score_samples(): @@ -893,7 +893,7 @@ def test_score_samples(): "before using this method.", gmm.score_samples, X) gmm_score_samples = gmm.fit(X).score_samples(X) - assert_equal(gmm_score_samples.shape[0], rand_data.n_samples) + assert gmm_score_samples.shape[0] == rand_data.n_samples def test_monotonic_likelihood(): @@ -920,7 +920,7 @@ def test_monotonic_likelihood(): current_log_likelihood = gmm.fit(X).score(X) except ConvergenceWarning: pass - assert_greater_equal(current_log_likelihood, + assert (current_log_likelihood >= prev_log_likelihood) if gmm.converged_: @@ -1021,11 +1021,11 @@ def test_sample(): # Check shapes of sampled data, see # https://github.com/scikit-learn/scikit-learn/issues/7701 - assert_equal(X_s.shape, (n_samples, n_features)) + assert X_s.shape == (n_samples, n_features) for sample_size in range(1, 100): X_s, _ = gmm.sample(sample_size) - assert_equal(X_s.shape, (sample_size, n_features)) + assert X_s.shape == (sample_size, n_features) @ignore_warnings(category=ConvergenceWarning) diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index d05d284d7aceb..695e68aae7dfe 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -122,7 +122,7 @@ def score(self): def assert_grid_iter_equals_getitem(grid): - assert_equal(list(grid), [grid[i] for i in range(len(grid))]) + assert list(grid) == [grid[i] for i in range(len(grid))] @pytest.mark.parametrize( @@ -144,33 +144,33 @@ def test_parameter_grid(): grid1 = ParameterGrid(params1) assert isinstance(grid1, Iterable) assert isinstance(grid1, Sized) - assert_equal(len(grid1), 3) + assert len(grid1) == 3 assert_grid_iter_equals_getitem(grid1) params2 = {"foo": [4, 2], "bar": ["ham", "spam", "eggs"]} grid2 = ParameterGrid(params2) - assert_equal(len(grid2), 6) + assert len(grid2) == 6 # loop to assert we can iterate over the grid multiple times for i in range(2): # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2) points = set(tuple(chain(*(sorted(p.items())))) for p in grid2) - assert_equal(points, + assert (points == set(("bar", x, "foo", y) for x, y in product(params2["bar"], params2["foo"]))) assert_grid_iter_equals_getitem(grid2) # Special case: empty grid (useful to get default estimator settings) empty = ParameterGrid({}) - assert_equal(len(empty), 1) - assert_equal(list(empty), [{}]) + assert len(empty) == 1 + assert list(empty) == [{}] assert_grid_iter_equals_getitem(empty) assert_raises(IndexError, lambda: empty[1]) has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}]) - assert_equal(len(has_empty), 4) - assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}, {'C': .5}]) + assert len(has_empty) == 4 + assert list(has_empty) == [{'C': 1}, {'C': 10}, {}, {'C': .5}] assert_grid_iter_equals_getitem(has_empty) @@ -183,7 +183,7 @@ def test_grid_search(): sys.stdout = StringIO() grid_search.fit(X, y) sys.stdout = old_stdout - assert_equal(grid_search.best_estimator_.foo_param, 2) + assert grid_search.best_estimator_.foo_param == 2 assert_array_equal(grid_search.cv_results_["param_foo_param"].data, [1, 2, 3]) @@ -243,9 +243,9 @@ def test_grid_search_no_score(): grid_search_no_score.fit(X, y) # check that best params are equal - assert_equal(grid_search_no_score.best_params_, grid_search.best_params_) + assert grid_search_no_score.best_params_ == grid_search.best_params_ # check that we can call score and that it gives the correct result - assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y)) + assert grid_search.score(X, y) == grid_search_no_score.score(X, y) # giving no scoring function raises an error grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs}) @@ -276,7 +276,7 @@ def test_grid_search_score_method(): # ensure the test is sane assert score_auc < 1.0 assert score_accuracy < 1.0 - assert_not_equal(score_auc, score_accuracy) + assert score_auc != score_accuracy assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc) @@ -409,7 +409,7 @@ def test_grid_search_when_param_grid_includes_range(): grid_search = None grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}, cv=3) grid_search.fit(X, y) - assert_equal(grid_search.best_estimator_.foo_param, 2) + assert grid_search.best_estimator_.foo_param == 2 def test_grid_search_bad_param_grid(): @@ -459,7 +459,7 @@ def test_grid_search_sparse(): C2 = cv.best_estimator_.C assert np.mean(y_pred == y_pred2) >= .9 - assert_equal(C, C2) + assert C == C2 def test_grid_search_sparse_scoring(): @@ -479,7 +479,7 @@ def test_grid_search_sparse_scoring(): C2 = cv.best_estimator_.C assert_array_equal(y_pred, y_pred2) - assert_equal(C, C2) + assert C == C2 # Smoke test the score # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]), # cv.score(X_[:180], y[:180])) @@ -493,7 +493,7 @@ def f1_loss(y_true_, y_pred_): y_pred3 = cv.predict(X_[180:]) C3 = cv.best_estimator_.C - assert_equal(C, C3) + assert C == C3 assert_array_equal(y_pred, y_pred3) @@ -747,18 +747,18 @@ def test_unsupervised_grid_search(): scoring=scoring, refit=refit) grid_search.fit(X, y) # Both ARI and FMS can find the right number :) - assert_equal(grid_search.best_params_["n_clusters"], 3) + assert grid_search.best_params_["n_clusters"] == 3 # Single metric evaluation unsupervised grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]), scoring='fowlkes_mallows_score') grid_search.fit(X, y) - assert_equal(grid_search.best_params_["n_clusters"], 3) + assert grid_search.best_params_["n_clusters"] == 3 # Now without a score, and without y grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4])) grid_search.fit(X) - assert_equal(grid_search.best_params_["n_clusters"], 4) + assert grid_search.best_params_["n_clusters"] == 4 def test_gridsearch_no_predict(): @@ -772,8 +772,8 @@ def custom_scoring(estimator, X): param_grid=dict(bandwidth=[.01, .1, 1]), scoring=custom_scoring) search.fit(X) - assert_equal(search.best_params_['bandwidth'], .1) - assert_equal(search.best_score_, 42) + assert search.best_params_['bandwidth'] == .1 + assert search.best_score_ == 42 def test_param_sampler(): @@ -783,7 +783,7 @@ def test_param_sampler(): sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) samples = [x for x in sampler] - assert_equal(len(samples), 10) + assert len(samples) == 10 for sample in samples: assert sample["kernel"] in ["rbf", "linear"] assert 0 <= sample["C"] <= 1 @@ -792,13 +792,13 @@ def test_param_sampler(): param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=3, random_state=0) - assert_equal([x for x in sampler], [x for x in sampler]) + assert [x for x in sampler] == [x for x in sampler] if sp_version >= (0, 16): param_distributions = {"C": uniform(0, 1)} sampler = ParameterSampler(param_distributions=param_distributions, n_iter=10, random_state=0) - assert_equal([x for x in sampler], [x for x in sampler]) + assert [x for x in sampler] == [x for x in sampler] def check_cv_results_array_types(search, param_keys, score_keys): @@ -852,7 +852,7 @@ def test_grid_search_cv_results(): search = GridSearchCV(SVC(), cv=n_splits, iid=iid, param_grid=params, return_train_score=True) search.fit(X, y) - assert_equal(iid, search.iid) + assert iid == search.iid cv_results = search.cv_results_ # Check if score and timing are reasonable assert all(cv_results['rank_test_score'] >= 1) @@ -905,7 +905,7 @@ def test_random_search_cv_results(): param_distributions=params, return_train_score=True) search.fit(X, y) - assert_equal(iid, search.iid) + assert iid == search.iid cv_results = search.cv_results_ # Check results structure check_cv_results_array_types(search, param_keys, score_keys) @@ -1008,7 +1008,7 @@ def test_search_iid_param(): train_std = search.cv_results_['std_train_score'][0] # Test the first candidate - assert_equal(search.cv_results_['param_C'][0], 1) + assert search.cv_results_['param_C'][0] == 1 assert_array_almost_equal(test_cv_scores, [1, 1. / 3.]) assert_array_almost_equal(train_cv_scores, [1, 1]) @@ -1054,7 +1054,7 @@ def test_search_iid_param(): train_mean = search.cv_results_['mean_train_score'][0] train_std = search.cv_results_['std_train_score'][0] - assert_equal(search.cv_results_['param_C'][0], 1) + assert search.cv_results_['param_C'][0] == 1 # scores are the same as above assert_array_almost_equal(test_cv_scores, [1, 1. / 3.]) # Unweighted mean/std is used @@ -1084,7 +1084,7 @@ def test_grid_search_cv_results_multimetric(): iid=iid, param_grid=params, scoring=scoring, refit=False) grid_search.fit(X, y) - assert_equal(grid_search.iid, iid) + assert grid_search.iid == iid grid_searches.append(grid_search) compare_cv_results_multimetric_with_single(*grid_searches, iid=iid) @@ -1133,7 +1133,7 @@ def compare_cv_results_multimetric_with_single( """Compare multi-metric cv_results with the ensemble of multiple single metric cv_results from single metric grid/random search""" - assert_equal(search_multi.iid, iid) + assert search_multi.iid == iid assert search_multi.multimetric_ assert_array_equal(sorted(search_multi.scorer_), ('accuracy', 'recall')) @@ -1162,10 +1162,10 @@ def compare_cv_results_multimetric_with_single( def compare_refit_methods_when_refit_with_acc(search_multi, search_acc, refit): """Compare refit multi-metric search methods with single metric methods""" if refit: - assert_equal(search_multi.refit, 'accuracy') + assert search_multi.refit == 'accuracy' else: assert not search_multi.refit - assert_equal(search_acc.refit, refit) + assert search_acc.refit == refit X, y = make_blobs(n_samples=100, n_features=4, random_state=42) for method in ('predict', 'predict_proba', 'predict_log_proba'): @@ -1173,7 +1173,7 @@ def compare_refit_methods_when_refit_with_acc(search_multi, search_acc, refit): getattr(search_acc, method)(X)) assert_almost_equal(search_multi.score(X, y), search_acc.score(X, y)) for key in ('best_index_', 'best_score_', 'best_params_'): - assert_equal(getattr(search_multi, key), getattr(search_acc, key)) + assert getattr(search_multi, key) == getattr(search_acc, key) def test_search_cv_results_rank_tie_breaking(): @@ -1245,7 +1245,7 @@ def test_search_cv_timing(): assert hasattr(search, "refit_time_") assert isinstance(search.refit_time_, float) - assert_greater_equal(search.refit_time_, 0) + assert search.refit_time_ >= 0 def test_grid_search_correct_score_results(): @@ -1301,8 +1301,8 @@ def test_fit_grid_point(): # Test the return values of fit_grid_point assert_almost_equal(this_scores, expected_score) - assert_equal(params, this_params) - assert_equal(n_test_samples, test.size) + assert params == this_params + assert n_test_samples == test.size # Should raise an error upon multimetric scorer assert_raise_message(ValueError, "For evaluating multiple scores, use " @@ -1492,7 +1492,7 @@ def test_parameters_sampler_replacement(): # degenerates to GridSearchCV if n_iter the same as grid_size sampler = ParameterSampler(params, n_iter=6) samples = list(sampler) - assert_equal(len(samples), 6) + assert len(samples) == 6 for values in ParameterGrid(params): assert values in samples @@ -1500,16 +1500,16 @@ def test_parameters_sampler_replacement(): params = {'a': range(10), 'b': range(10), 'c': range(10)} sampler = ParameterSampler(params, n_iter=99, random_state=42) samples = list(sampler) - assert_equal(len(samples), 99) + assert len(samples) == 99 hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c']) for p in samples] - assert_equal(len(set(hashable_samples)), 99) + assert len(set(hashable_samples)) == 99 # doesn't go into infinite loops params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']} sampler = ParameterSampler(params_distribution, n_iter=7) samples = list(sampler) - assert_equal(len(samples), 7) + assert len(samples) == 7 def test_stochastic_gradient_loss_param(): diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 1f47e85ab3c43..583000e2000bc 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -169,7 +169,7 @@ def test_cross_validator_with_default_params(): [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr, ss_repr, ps_repr])): # Test if get_n_splits works correctly - assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups)) + assert n_splits_expected[i] == cv.get_n_splits(X, y, groups) # Test if the cross-validator works as expected even if # the data is 1d @@ -177,11 +177,11 @@ def test_cross_validator_with_default_params(): list(cv.split(X_1d, y, groups))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, groups): - assert_equal(np.asarray(train).dtype.kind, 'i') - assert_equal(np.asarray(train).dtype.kind, 'i') + assert np.asarray(train).dtype.kind == 'i' + assert np.asarray(train).dtype.kind == 'i' # Test if the repr works without any errors - assert_equal(cv_repr, repr(cv)) + assert cv_repr == repr(cv) # ValueError for get_n_splits methods msg = "The 'X' parameter should not be None." @@ -223,18 +223,18 @@ def check_valid_split(train, test, n_samples=None): train, test = set(train), set(test) # Train and test split should not overlap - assert_equal(train.intersection(test), set()) + assert train.intersection(test) == set() if n_samples is not None: # Check that the union of train an test split cover all the indices - assert_equal(train.union(test), set(range(n_samples))) + assert train.union(test) == set(range(n_samples)) def check_cv_coverage(cv, X, y, groups, expected_n_splits=None): n_samples = _num_samples(X) # Check that a all the samples appear at least once in a test fold if expected_n_splits is not None: - assert_equal(cv.get_n_splits(X, y, groups), expected_n_splits) + assert cv.get_n_splits(X, y, groups) == expected_n_splits else: expected_n_splits = cv.get_n_splits(X, y, groups) @@ -246,9 +246,9 @@ def check_cv_coverage(cv, X, y, groups, expected_n_splits=None): collected_test_samples.update(test) # Check that the accumulated test samples cover the whole dataset - assert_equal(iterations, expected_n_splits) + assert iterations == expected_n_splits if n_samples is not None: - assert_equal(collected_test_samples, set(range(n_samples))) + assert collected_test_samples == set(range(n_samples)) def test_kfold_valueerrors(): @@ -311,7 +311,7 @@ def test_kfold_indices(): check_cv_coverage(kf, X2, y=None, groups=None, expected_n_splits=3) # Check if get_n_splits returns the number of folds - assert_equal(5, KFold(5).get_n_splits(X2)) + assert 5 == KFold(5).get_n_splits(X2) def test_kfold_no_shuffle(): @@ -362,7 +362,7 @@ def test_stratified_kfold_no_shuffle(): assert_array_equal(train, [0, 1, 3, 4]) # Check if get_n_splits returns the number of folds - assert_equal(5, StratifiedKFold(5).get_n_splits(X, y)) + assert 5 == StratifiedKFold(5).get_n_splits(X, y) # Make sure string labels are also supported X = np.ones(7) @@ -399,7 +399,7 @@ def test_kfold_balance(): sizes = [len(test) for _, test in kf] assert (np.max(sizes) - np.min(sizes)) <= 1 - assert_equal(np.sum(sizes), i) + assert np.sum(sizes) == i def test_stratifiedkfold_balance(): @@ -416,7 +416,7 @@ def test_stratifiedkfold_balance(): sizes = [len(test) for _, test in skf] assert (np.max(sizes) - np.min(sizes)) <= 1 - assert_equal(np.sum(sizes), i) + assert np.sum(sizes) == i def test_shuffle_kfold(): @@ -432,13 +432,13 @@ def test_shuffle_kfold(): kf.split(X), kf2.split(X), kf3.split(X)): for tr_a, tr_b in combinations((tr1, tr2, tr3), 2): # Assert that there is no complete overlap - assert_not_equal(len(np.intersect1d(tr_a, tr_b)), len(tr1)) + assert len(np.intersect1d(tr_a, tr_b)) != len(tr1) # Set all test indices in successive iterations of kf2 to 1 all_folds[te2] = 1 # Check that all indices are returned in the different test folds - assert_equal(sum(all_folds), 300) + assert sum(all_folds) == 300 def test_shuffle_kfold_stratifiedkfold_reproducibility(): @@ -485,7 +485,7 @@ def test_shuffle_stratifiedkfold(): kf1 = StratifiedKFold(5, shuffle=True, random_state=1) for (_, test0), (_, test1) in zip(kf0.split(X_40, y), kf1.split(X_40, y)): - assert_not_equal(set(test0), set(test1)) + assert set(test0) != set(test1) check_cv_coverage(kf0, X_40, y, groups=None, expected_n_splits=5) # Ensure that we shuffle each class's samples with different @@ -516,8 +516,8 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 cv = KFold(n_splits=n_splits, shuffle=False) mean_score = cross_val_score(model, X, y, cv=cv).mean() - assert_greater(0.92, mean_score) - assert_greater(mean_score, 0.80) + assert 0.92 > mean_score + assert mean_score > 0.80 # Shuffling the data artificially breaks the dependency and hides the # overfitting of the model with regards to the writing style of the authors @@ -525,11 +525,11 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 cv = KFold(n_splits, shuffle=True, random_state=0) mean_score = cross_val_score(model, X, y, cv=cv).mean() - assert_greater(mean_score, 0.92) + assert mean_score > 0.92 cv = KFold(n_splits, shuffle=True, random_state=1) mean_score = cross_val_score(model, X, y, cv=cv).mean() - assert_greater(mean_score, 0.92) + assert mean_score > 0.92 # Similarly, StratifiedKFold should try to shuffle the data as little # as possible (while respecting the balanced class constraints) @@ -540,8 +540,8 @@ def test_kfold_can_detect_dependent_samples_on_digits(): # see #2372 cv = StratifiedKFold(n_splits) mean_score = cross_val_score(model, X, y, cv=cv).mean() - assert_greater(0.93, mean_score) - assert_greater(mean_score, 0.80) + assert 0.93 > mean_score + assert mean_score > 0.80 def test_shuffle_split(): @@ -628,8 +628,8 @@ def test_stratified_shuffle_split_respects_test_size(): sss = StratifiedShuffleSplit(6, test_size=test_size, train_size=train_size, random_state=0).split(np.ones(len(y)), y) for train, test in sss: - assert_equal(len(train), train_size) - assert_equal(len(test), test_size) + assert len(train) == train_size + assert len(test) == test_size def test_stratified_shuffle_split_iter(): @@ -661,9 +661,9 @@ def test_stratified_shuffle_split_iter(): return_inverse=True)[1]) / float(len(y[test]))) assert_array_almost_equal(p_train, p_test, 1) - assert_equal(len(train) + len(test), y.size) - assert_equal(len(train), train_size) - assert_equal(len(test), test_size) + assert len(train) + len(test) == y.size + assert len(train) == train_size + assert len(test) == test_size assert_array_equal(np.lib.arraysetops.intersect1d(train, test), []) @@ -697,19 +697,19 @@ def assert_counts_are_ok(idx_counts, p): for counter, ids in [(train_counts, train), (test_counts, test)]: for id in ids: counter[id] += 1 - assert_equal(n_splits_actual, n_splits) + assert n_splits_actual == n_splits n_train, n_test = _validate_shuffle_split( n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds)) - assert_equal(len(train), n_train) - assert_equal(len(test), n_test) - assert_equal(len(set(train).intersection(test)), 0) + assert len(train) == n_train + assert len(test) == n_test + assert len(set(train).intersection(test)) == 0 group_counts = np.unique(groups) - assert_equal(splits.test_size, 1.0 / n_folds) - assert_equal(n_train + n_test, len(groups)) - assert_equal(len(group_counts), 2) + assert splits.test_size == 1.0 / n_folds + assert n_train + n_test == len(groups) + assert len(group_counts) == 2 ex_test_p = float(n_test) / n_samples ex_train_p = float(n_train) / n_samples @@ -754,8 +754,8 @@ def test_stratified_shuffle_split_multilabel(): # correct stratification of entire rows # (by design, here y[:, 0] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 0]) - assert_equal(expected_ratio, np.mean(y_train[:, 0])) - assert_equal(expected_ratio, np.mean(y_test[:, 0])) + assert expected_ratio == np.mean(y_train[:, 0]) + assert expected_ratio == np.mean(y_test[:, 0]) def test_stratified_shuffle_split_multilabel_many_labels(): @@ -777,8 +777,8 @@ def test_stratified_shuffle_split_multilabel_many_labels(): # correct stratification of entire rows # (by design, here y[:, 4] uniquely determines the entire row of y) expected_ratio = np.mean(y[:, 4]) - assert_equal(expected_ratio, np.mean(y_train[:, 4])) - assert_equal(expected_ratio, np.mean(y_test[:, 4])) + assert expected_ratio == np.mean(y_train[:, 4]) + assert expected_ratio == np.mean(y_test[:, 4]) def test_predefinedsplit_with_kfold_split(): @@ -792,7 +792,7 @@ def test_predefinedsplit_with_kfold_split(): folds[test_ind] = i ps = PredefinedSplit(folds) # n_splits is simply the no of unique folds - assert_equal(len(np.unique(folds)), ps.get_n_splits()) + assert len(np.unique(folds)) == ps.get_n_splits() ps_train, ps_test = zip(*ps.split()) assert_array_equal(ps_train, kf_train) assert_array_equal(ps_test, kf_test) @@ -809,7 +809,7 @@ def test_group_shuffle_split(): repr(slo) # Test that the length is correct - assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits) + assert slo.get_n_splits(X, y, groups=groups_i) == n_splits l_unique = np.unique(groups_i) l = np.asarray(groups_i) @@ -822,7 +822,7 @@ def test_group_shuffle_split(): assert not np.any(np.in1d(l[test], l_train_unique)) # Second test: train and test add up to all the data - assert_equal(l[train].size + l[test].size, l.size) + assert l[train].size + l[test].size == l.size # Third test: train and test are disjoint assert_array_equal(np.intersect1d(train, test), []) @@ -841,10 +841,10 @@ def test_leave_one_p_group_out(): lpgo_2 = LeavePGroupsOut(n_groups=2) # Make sure the repr works - assert_equal(repr(logo), 'LeaveOneGroupOut()') - assert_equal(repr(lpgo_1), 'LeavePGroupsOut(n_groups=1)') - assert_equal(repr(lpgo_2), 'LeavePGroupsOut(n_groups=2)') - assert_equal(repr(LeavePGroupsOut(n_groups=3)), + assert repr(logo) == 'LeaveOneGroupOut()' + assert repr(lpgo_1) == 'LeavePGroupsOut(n_groups=1)' + assert repr(lpgo_2) == 'LeavePGroupsOut(n_groups=2)' + assert (repr(LeavePGroupsOut(n_groups=3)) == 'LeavePGroupsOut(n_groups=3)') for j, (cv, p_groups_out) in enumerate(((logo, 1), (lpgo_1, 1), @@ -856,7 +856,7 @@ def test_leave_one_p_group_out(): X = y = np.ones(len(groups_i)) # Test that the length is correct - assert_equal(cv.get_n_splits(X, y, groups=groups_i), n_splits) + assert cv.get_n_splits(X, y, groups=groups_i) == n_splits groups_arr = np.asarray(groups_i) @@ -868,17 +868,17 @@ def test_leave_one_p_group_out(): []) # Second test: train and test add up to all the data - assert_equal(len(train) + len(test), len(groups_i)) + assert len(train) + len(test) == len(groups_i) # Third test: # The number of groups in test must be equal to p_groups_out assert np.unique(groups_arr[test]).shape[0], p_groups_out # check get_n_splits() with dummy parameters - assert_equal(logo.get_n_splits(None, None, ['a', 'b', 'c', 'b', 'c']), 3) - assert_equal(logo.get_n_splits(groups=[1.0, 1.1, 1.0, 1.2]), 3) - assert_equal(lpgo_2.get_n_splits(None, None, np.arange(4)), 6) - assert_equal(lpgo_1.get_n_splits(groups=np.arange(4)), 4) + assert logo.get_n_splits(None, None, ['a', 'b', 'c', 'b', 'c']) == 3 + assert logo.get_n_splits(groups=[1.0, 1.1, 1.0, 1.2]) == 3 + assert lpgo_2.get_n_splits(None, None, np.arange(4)) == 6 + assert lpgo_1.get_n_splits(groups=np.arange(4)) == 4 # raise ValueError if a `groups` parameter is illegal with assert_raises(ValueError): @@ -910,12 +910,12 @@ def test_leave_group_out_changing_groups(): assert_array_equal(test, test_chan) # n_splits = no of 2 (p) group combinations of the unique groups = 3C2 = 3 - assert_equal( - 3, LeavePGroupsOut(n_groups=2).get_n_splits(X, y=X, + assert ( + 3 == LeavePGroupsOut(n_groups=2).get_n_splits(X, y=X, groups=groups)) # n_splits = no of unique groups (C(uniq_lbls, 1) = n_unique_groups) - assert_equal(3, LeaveOneGroupOut().get_n_splits(X, y=X, - groups=groups)) + assert 3 == LeaveOneGroupOut().get_n_splits(X, y=X, + groups=groups) def test_leave_one_p_group_out_error_on_fewer_number_of_groups(): @@ -987,7 +987,7 @@ def test_get_n_splits_for_repeated_kfold(): n_repeats = 4 rkf = RepeatedKFold(n_splits, n_repeats) expected_n_splits = n_splits * n_repeats - assert_equal(expected_n_splits, rkf.get_n_splits()) + assert expected_n_splits == rkf.get_n_splits() def test_get_n_splits_for_repeated_stratified_kfold(): @@ -995,7 +995,7 @@ def test_get_n_splits_for_repeated_stratified_kfold(): n_repeats = 4 rskf = RepeatedStratifiedKFold(n_splits, n_repeats) expected_n_splits = n_splits * n_repeats - assert_equal(expected_n_splits, rskf.get_n_splits()) + assert expected_n_splits == rskf.get_n_splits() def test_repeated_stratified_kfold_determinstic_split(): @@ -1105,7 +1105,7 @@ def test_train_test_split(): # simple test split = train_test_split(X, y, test_size=None, train_size=.5) X_train, X_test, y_train, y_test = split - assert_equal(len(y_test), len(y_train)) + assert len(y_test) == len(y_train) # test correspondence of X and y assert_array_equal(X_train[:, 0], y_train * 10) assert_array_equal(X_test[:, 0], y_test * 10) @@ -1120,10 +1120,10 @@ def test_train_test_split(): X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2) y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11) split = train_test_split(X_4d, y_3d) - assert_equal(split[0].shape, (7, 5, 3, 2)) - assert_equal(split[1].shape, (3, 5, 3, 2)) - assert_equal(split[2].shape, (7, 7, 11)) - assert_equal(split[3].shape, (3, 7, 11)) + assert split[0].shape == (7, 5, 3, 2) + assert split[1].shape == (3, 5, 3, 2) + assert split[2].shape == (7, 7, 11) + assert split[3].shape == (3, 7, 11) # test stratification option y = np.array([1, 1, 1, 1, 2, 2, 2, 2]) @@ -1132,10 +1132,10 @@ def test_train_test_split(): train, test = train_test_split(y, test_size=test_size, stratify=y, random_state=0) - assert_equal(len(test), exp_test_size) - assert_equal(len(test) + len(train), len(y)) + assert len(test) == exp_test_size + assert len(test) + len(train) == len(y) # check the 1:1 ratio of ones and twos in the data is preserved - assert_equal(np.sum(train == 1), np.sum(train == 2)) + assert np.sum(train == 1) == np.sum(train == 2) # test unshuffled split y = np.arange(10) @@ -1337,19 +1337,19 @@ def test_group_kfold(): folds[test] = i # Check that folds have approximately the same size - assert_equal(len(folds), len(groups)) + assert len(folds) == len(groups) for i in np.unique(folds): - assert_greater_equal(tolerance, + assert (tolerance >= abs(sum(folds == i) - ideal_n_groups_per_fold)) # Check that each group appears only in 1 fold for group in np.unique(groups): - assert_equal(len(np.unique(folds[groups == group])), 1) + assert len(np.unique(folds[groups == group])) == 1 # Check that no group is on both sides of the split groups = np.asarray(groups, dtype=object) for train, test in lkf.split(X, y, groups): - assert_equal(len(np.intersect1d(groups[train], groups[test])), 0) + assert len(np.intersect1d(groups[train], groups[test])) == 0 # Construct the test data groups = np.array(['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean', @@ -1374,21 +1374,21 @@ def test_group_kfold(): folds[test] = i # Check that folds have approximately the same size - assert_equal(len(folds), len(groups)) + assert len(folds) == len(groups) for i in np.unique(folds): - assert_greater_equal(tolerance, + assert (tolerance >= abs(sum(folds == i) - ideal_n_groups_per_fold)) # Check that each group appears only in 1 fold with warnings.catch_warnings(): warnings.simplefilter("ignore", DeprecationWarning) for group in np.unique(groups): - assert_equal(len(np.unique(folds[groups == group])), 1) + assert len(np.unique(folds[groups == group])) == 1 # Check that no group is on both sides of the split groups = np.asarray(groups, dtype=object) for train, test in lkf.split(X, y, groups): - assert_equal(len(np.intersect1d(groups[train], groups[test])), 0) + assert len(np.intersect1d(groups[train], groups[test])) == 0 # groups can also be a list cv_iter = list(lkf.split(X, y, groups.tolist())) @@ -1438,8 +1438,8 @@ def test_time_series_cv(): # Check get_n_splits returns the correct number of splits splits = TimeSeriesSplit(2).split(X) n_splits_actual = len(list(splits)) - assert_equal(n_splits_actual, tscv.get_n_splits()) - assert_equal(n_splits_actual, 2) + assert n_splits_actual == tscv.get_n_splits() + assert n_splits_actual == 2 def _check_time_series_max_train_size(splits, check_splits, max_train_size): @@ -1493,7 +1493,7 @@ def __init__(self, a, b=0, c=None): def __repr__(self): return _build_repr(self) - assert_equal(repr(MockSplitter(5, 6)), "MockSplitter(a=5, b=6, c=None)") + assert repr(MockSplitter(5, 6)) == "MockSplitter(a=5, b=6, c=None)" @pytest.mark.parametrize('CVSplitter', (ShuffleSplit, GroupShuffleSplit, diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index ca43e244fa7df..1d0f1cb1be8d0 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -415,7 +415,7 @@ def check_cross_validate_single_metric(clf, X, y, scores): scoring='neg_mean_squared_error', return_train_score=False) assert isinstance(mse_scores_dict, dict) - assert_equal(len(mse_scores_dict), dict_len) + assert len(mse_scores_dict) == dict_len assert_array_almost_equal(mse_scores_dict['test_score'], test_mse_scores) @@ -430,7 +430,7 @@ def check_cross_validate_single_metric(clf, X, y, scores): r2_scores_dict = cross_validate(clf, X, y, scoring=['r2'], return_train_score=False) assert isinstance(r2_scores_dict, dict) - assert_equal(len(r2_scores_dict), dict_len) + assert len(r2_scores_dict) == dict_len assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores) # Test return_estimator option @@ -470,9 +470,9 @@ def check_cross_validate_multi_metric(clf, X, y, scores): cv_results = cross_validate(clf, X, y, scoring=scoring, return_train_score=False) assert isinstance(cv_results, dict) - assert_equal(set(cv_results.keys()), - keys_with_train if return_train_score - else keys_sans_train) + assert (set(cv_results.keys()) == + (keys_with_train if return_train_score + else keys_sans_train)) assert_array_almost_equal(cv_results['test_r2'], test_r2_scores) assert_array_almost_equal( cv_results['test_neg_mean_squared_error'], test_mse_scores) @@ -591,9 +591,9 @@ def assert_fit_params(clf): # Function to test that the values are passed correctly to the # classifier arguments for non-array type - assert_equal(clf.dummy_int, DUMMY_INT) - assert_equal(clf.dummy_str, DUMMY_STR) - assert_equal(clf.dummy_obj, DUMMY_OBJ) + assert clf.dummy_int == DUMMY_INT + assert clf.dummy_str == DUMMY_STR + assert clf.dummy_obj == DUMMY_OBJ fit_params = {'sample_weight': np.ones(n_samples), 'class_prior': np.full(n_classes, 1. / n_classes), @@ -686,7 +686,7 @@ def test_permutation_score(): score, scores, pvalue = permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") - assert_greater(score, 0.9) + assert score > 0.9 assert_almost_equal(pvalue, 0.0, 1) score_group, _, pvalue_group = permutation_test_score( @@ -722,8 +722,8 @@ def custom_score(y_true, y_pred): score, scores, pvalue = permutation_test_score( svm, X, y, n_permutations=30, cv=cv, scoring="accuracy") - assert_less(score, 0.5) - assert_greater(pvalue, 0.2) + assert score < 0.5 + assert pvalue > 0.2 def test_permutation_test_score_allow_nans(): @@ -784,11 +784,11 @@ def test_cross_val_predict(): assert_array_almost_equal(preds, preds2) preds = cross_val_predict(est, X, y) - assert_equal(len(preds), len(y)) + assert len(preds) == len(y) cv = LeaveOneOut() preds = cross_val_predict(est, X, y, cv=cv) - assert_equal(len(preds), len(y)) + assert len(preds) == len(y) Xsp = X.copy() Xsp *= (Xsp > np.median(Xsp)) @@ -797,7 +797,7 @@ def test_cross_val_predict(): assert_array_almost_equal(len(preds), len(y)) preds = cross_val_predict(KMeans(), X) - assert_equal(len(preds), len(y)) + assert len(preds) == len(y) class BadCV(): def split(self, X, y=None, groups=None): @@ -822,13 +822,13 @@ def test_cross_val_predict_decision_function_shape(): preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='decision_function') - assert_equal(preds.shape, (50,)) + assert preds.shape == (50,) X, y = load_iris(return_X_y=True) preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='decision_function') - assert_equal(preds.shape, (150, 3)) + assert preds.shape == (150, 3) # This specifically tests imbalanced splits for binary # classification with decision_function. This is only @@ -852,7 +852,7 @@ def test_cross_val_predict_decision_function_shape(): preds = cross_val_predict(est, X, y, method='decision_function') - assert_equal(preds.shape, (1797, 45)) + assert preds.shape == (1797, 45) ind = np.argsort(y) X, y = X[ind], y[ind] @@ -869,13 +869,13 @@ def test_cross_val_predict_predict_proba_shape(): preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='predict_proba') - assert_equal(preds.shape, (50, 2)) + assert preds.shape == (50, 2) X, y = load_iris(return_X_y=True) preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='predict_proba') - assert_equal(preds.shape, (150, 3)) + assert preds.shape == (150, 3) def test_cross_val_predict_predict_log_proba_shape(): @@ -883,13 +883,13 @@ def test_cross_val_predict_predict_log_proba_shape(): preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='predict_log_proba') - assert_equal(preds.shape, (50, 2)) + assert preds.shape == (50, 2) X, y = load_iris(return_X_y=True) preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y, method='predict_log_proba') - assert_equal(preds.shape, (150, 3)) + assert preds.shape == (150, 3) def test_cross_val_predict_input_types(): @@ -902,11 +902,11 @@ def test_cross_val_predict_input_types(): # 3 fold cv is used --> atleast 3 samples per class # Smoke test predictions = cross_val_predict(clf, X, y) - assert_equal(predictions.shape, (150,)) + assert predictions.shape == (150,) # test with multioutput y predictions = cross_val_predict(clf, X_sparse, multioutput_y) - assert_equal(predictions.shape, (150, 2)) + assert predictions.shape == (150, 2) predictions = cross_val_predict(clf, X_sparse, y) assert_array_equal(predictions.shape, (150,)) @@ -1000,10 +1000,10 @@ def test_learning_curve(): shuffle=shuffle_train, return_times=True) if len(w) > 0: raise RuntimeError("Unexpected warning: %r" % w[0].message) - assert_equal(train_scores.shape, (10, 3)) - assert_equal(test_scores.shape, (10, 3)) - assert_equal(fit_times.shape, (10, 3)) - assert_equal(score_times.shape, (10, 3)) + assert train_scores.shape == (10, 3) + assert test_scores.shape == (10, 3) + assert fit_times.shape == (10, 3) + assert score_times.shape == (10, 3) assert_array_equal(train_sizes, np.linspace(2, 20, 10)) assert_array_almost_equal(train_scores.mean(axis=1), np.linspace(1.9, 1.0, 10)) @@ -1012,8 +1012,8 @@ def test_learning_curve(): # Cannot use assert_array_almost_equal for fit and score times because # the values are hardware-dependant - assert_equal(fit_times.dtype, "float64") - assert_equal(score_times.dtype, "float64") + assert fit_times.dtype == "float64" + assert score_times.dtype == "float64" # Test a custom cv splitter that can iterate only once with warnings.catch_warnings(record=True) as w: @@ -1387,7 +1387,7 @@ def check_cross_val_predict_multilabel(est, X, y, method): # Check actual outputs for several representations of y for tg in [y, y + 1, y - 2, y.astype('str')]: cv_predict_output = cross_val_predict(est, X, tg, method=method, cv=cv) - assert_equal(len(cv_predict_output), len(expected_preds)) + assert len(cv_predict_output) == len(expected_preds) for i in range(len(cv_predict_output)): assert_allclose(cv_predict_output[i], expected_preds[i]) @@ -1670,7 +1670,7 @@ def test_fit_and_score_failing(): [FailingClassifier.FAILING_PARAMETER], cv=3, error_score='unvalid-string') - assert_equal(failing_clf.score(), 0.) # FailingClassifier coverage + assert failing_clf.score() == 0. # FailingClassifier coverage def test_fit_and_score_working(): From a51d25f9bff69c4a79a5168de961b6e3e4c9e126 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 15:59:44 +0200 Subject: [PATCH 14/22] fix neighbors --- sklearn/neighbors/tests/test_kde.py | 8 ++--- sklearn/neighbors/tests/test_lof.py | 8 ++--- sklearn/neighbors/tests/test_nca.py | 2 +- .../neighbors/tests/test_nearest_centroid.py | 2 +- sklearn/neighbors/tests/test_neighbors.py | 34 +++++++++---------- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py index 61dfa778194cf..3d41add23c2bc 100644 --- a/sklearn/neighbors/tests/test_kde.py +++ b/sklearn/neighbors/tests/test_kde.py @@ -75,7 +75,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3): # draw a tophat sample kde = KernelDensity(bandwidth, kernel=kernel).fit(X) samp = kde.sample(100) - assert_equal(X.shape, samp.shape) + assert X.shape == samp.shape # check that samples are in the right range nbrs = NearestNeighbors(n_neighbors=1).fit(X) @@ -96,7 +96,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3): # non-regression test: used to return a scalar X = rng.randn(4, 1) kde = KernelDensity(kernel="gaussian").fit(X) - assert_equal(kde.sample().shape, (1, 1)) + assert kde.sample().shape == (1, 1) @pytest.mark.parametrize('algorithm', ['auto', 'ball_tree', 'kd_tree']) @@ -116,7 +116,7 @@ def test_kde_algorithm_metric_choice(algorithm, metric): kde = KernelDensity(algorithm=algorithm, metric=metric) kde.fit(X) y_dens = kde.score_samples(Y) - assert_equal(y_dens.shape, Y.shape[:1]) + assert y_dens.shape == Y.shape[:1] def test_kde_score(n_samples=100, n_features=3): @@ -154,7 +154,7 @@ def test_kde_pipeline_gridsearch(): params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10]) search = GridSearchCV(pipe1, param_grid=params) search.fit(X) - assert_equal(search.best_params_['kerneldensity__bandwidth'], .1) + assert search.best_params_['kerneldensity__bandwidth'] == .1 def test_kde_sample_weights(): diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py index a28118dad53ce..a00017494e328 100644 --- a/sklearn/neighbors/tests/test_lof.py +++ b/sklearn/neighbors/tests/test_lof.py @@ -44,7 +44,7 @@ def test_lof(): assert_array_equal(clf._fit_X, X) # Assert largest outlier score is smaller than smallest inlier score: - assert_greater(np.min(score[:-2]), np.max(score[-2:])) + assert np.min(score[:-2]) > np.max(score[-2:]) # Assert predict() works: clf = neighbors.LocalOutlierFactor(contamination=0.25, @@ -71,7 +71,7 @@ def test_lof_performance(): y_pred = -clf.decision_function(X_test) # check that roc_auc is good - assert_greater(roc_auc_score(y_test, y_pred), .99) + assert roc_auc_score(y_test, y_pred) > .99 def test_lof_values(): @@ -123,13 +123,13 @@ def test_lof_precomputed(random_state=42): def test_n_neighbors_attribute(): X = iris.data clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X) - assert_equal(clf.n_neighbors_, X.shape[0] - 1) + assert clf.n_neighbors_ == X.shape[0] - 1 clf = neighbors.LocalOutlierFactor(n_neighbors=500) assert_warns_message(UserWarning, "n_neighbors will be set to (n_samples - 1)", clf.fit, X) - assert_equal(clf.n_neighbors_, X.shape[0] - 1) + assert clf.n_neighbors_ == X.shape[0] - 1 def test_score_samples(): diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py index 49f94bc4d56a5..c9b78f13886ed 100644 --- a/sklearn/neighbors/tests/test_nca.py +++ b/sklearn/neighbors/tests/test_nca.py @@ -509,7 +509,7 @@ def callback(self, transformation, n_iter): cb = transformation_storer.callback nca = NeighborhoodComponentsAnalysis(max_iter=5, callback=cb) nca.fit(X, y) - assert_equal(transformation_storer.transformation.size, X.shape[1]**2) + assert transformation_storer.transformation.size == X.shape[1]**2 def test_convergence_warning(): diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py index 25fac197c3657..3b962372159eb 100644 --- a/sklearn/neighbors/tests/test_nearest_centroid.py +++ b/sklearn/neighbors/tests/test_nearest_centroid.py @@ -90,7 +90,7 @@ def test_pickle(): s = pickle.dumps(obj) obj2 = pickle.loads(s) - assert_equal(type(obj2), obj.__class__) + assert type(obj2) == obj.__class__ score2 = obj2.score(iris.data, iris.target) assert_array_equal(score, score2, "Failed to generate same score" diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index ff73b79493cb5..afa7159d3d61c 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -469,8 +469,8 @@ def test_radius_neighbors_boundary_handling(): nbrs = neighbors.NearestNeighbors(radius=radius, algorithm=algorithm).fit(X) results = nbrs.radius_neighbors([[0.0]], return_distance=False) - assert_equal(results.shape, (1,)) - assert_equal(results.dtype, object) + assert results.shape == (1,) + assert results.dtype == object assert_array_equal(results[0], [0, 1]) @@ -498,7 +498,7 @@ def test_RadiusNeighborsClassifier_multioutput(): y_pred_so.append(rnn.predict(X_test)) y_pred_so = np.vstack(y_pred_so).T - assert_equal(y_pred_so.shape, y_test.shape) + assert y_pred_so.shape == y_test.shape # Multioutput prediction rnn_mo = neighbors.RadiusNeighborsClassifier(weights=weights, @@ -506,7 +506,7 @@ def test_RadiusNeighborsClassifier_multioutput(): rnn_mo.fit(X_train, y_train) y_pred_mo = rnn_mo.predict(X_test) - assert_equal(y_pred_mo.shape, y_test.shape) + assert y_pred_mo.shape == y_test.shape assert_array_almost_equal(y_pred_mo, y_pred_so) @@ -559,8 +559,8 @@ def test_KNeighborsClassifier_multioutput(): y_pred_proba_so.append(knn.predict_proba(X_test)) y_pred_so = np.vstack(y_pred_so).T - assert_equal(y_pred_so.shape, y_test.shape) - assert_equal(len(y_pred_proba_so), n_output) + assert y_pred_so.shape == y_test.shape + assert len(y_pred_proba_so) == n_output # Multioutput prediction knn_mo = neighbors.KNeighborsClassifier(weights=weights, @@ -568,12 +568,12 @@ def test_KNeighborsClassifier_multioutput(): knn_mo.fit(X_train, y_train) y_pred_mo = knn_mo.predict(X_test) - assert_equal(y_pred_mo.shape, y_test.shape) + assert y_pred_mo.shape == y_test.shape assert_array_almost_equal(y_pred_mo, y_pred_so) # Check proba y_pred_proba_mo = knn_mo.predict_proba(X_test) - assert_equal(len(y_pred_proba_mo), n_output) + assert len(y_pred_proba_mo) == n_output for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so): assert_array_almost_equal(proba_mo, proba_so) @@ -627,8 +627,8 @@ def test_KNeighborsRegressor_multioutput_uniform_weight(): y_pred = knn.predict(X_test) - assert_equal(y_pred.shape, y_test.shape) - assert_equal(y_pred_idx.shape, y_test.shape) + assert y_pred.shape == y_test.shape + assert y_pred_idx.shape == y_test.shape assert_array_almost_equal(y_pred, y_pred_idx) @@ -654,7 +654,7 @@ def test_kneighbors_regressor_multioutput(n_samples=40, knn.fit(X, y) epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = knn.predict(X[:n_test_pts] + epsilon) - assert_equal(y_pred.shape, y_target.shape) + assert y_pred.shape == y_target.shape assert np.all(np.abs(y_pred - y_target) < 0.3) @@ -725,8 +725,8 @@ def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight(): y_pred_idx = np.array(y_pred_idx) y_pred = rnn.predict(X_test) - assert_equal(y_pred_idx.shape, y_test.shape) - assert_equal(y_pred.shape, y_test.shape) + assert y_pred_idx.shape == y_test.shape + assert y_pred.shape == y_test.shape assert_array_almost_equal(y_pred, y_pred_idx) @@ -753,7 +753,7 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40, epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1) y_pred = rnn.predict(X[:n_test_pts] + epsilon) - assert_equal(y_pred.shape, y_target.shape) + assert y_pred.shape == y_target.shape assert np.all(np.abs(y_pred - y_target) < 0.3) @@ -805,7 +805,7 @@ def test_neighbors_iris(): rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm) rgs.fit(iris.data, iris.target) - assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target), + assert (np.mean(rgs.predict(iris.data).round() == iris.target) > 0.95) @@ -826,7 +826,7 @@ def test_neighbors_digits(): score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test) score_float = clf.fit(X_train.astype(float, copy=False), Y_train).score( X_test.astype(float, copy=False), Y_test) - assert_equal(score_uint8, score_float) + assert score_uint8 == score_float def test_kneighbors_graph(): @@ -1068,7 +1068,7 @@ def test_valid_brute_metric_for_auto_algorithm(): # check that there is a metric that is valid for brute # but not ball_tree (so we actually test something) - assert_in("cosine", VALID_METRICS['brute']) + assert "cosine" in VALID_METRICS['brute'] assert "cosine" not in VALID_METRICS['ball_tree'] # Metric which don't required any additional parameter From cfee0360b8dea58d0b658491df2790bffe31f1e6 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:00:18 +0200 Subject: [PATCH 15/22] fix neural_networks --- sklearn/neural_network/tests/test_mlp.py | 56 +++++++++---------- .../tests/test_stochastic_optimizers.py | 2 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py index 147eeee04b255..058dd1bde4239 100644 --- a/sklearn/neural_network/tests/test_mlp.py +++ b/sklearn/neural_network/tests/test_mlp.py @@ -246,8 +246,8 @@ def test_lbfgs_classification(): activation=activation) mlp.fit(X_train, y_train) y_predict = mlp.predict(X_test) - assert_greater(mlp.score(X_train, y_train), 0.95) - assert_equal((y_predict.shape[0], y_predict.dtype.kind), + assert mlp.score(X_train, y_train) > 0.95 + assert ((y_predict.shape[0], y_predict.dtype.kind) == expected_shape_dtype) @@ -261,10 +261,10 @@ def test_lbfgs_regression(): activation=activation) mlp.fit(X, y) if activation == 'identity': - assert_greater(mlp.score(X, y), 0.84) + assert mlp.score(X, y) > 0.84 else: # Non linear models perform much better than linear bottleneck: - assert_greater(mlp.score(X, y), 0.95) + assert mlp.score(X, y) > 0.95 def test_learning_rate_warmstart(): @@ -282,9 +282,9 @@ def test_learning_rate_warmstart(): post_eta = mlp._optimizer.learning_rate if learning_rate == 'constant': - assert_equal(prev_eta, post_eta) + assert prev_eta == post_eta elif learning_rate == 'invscaling': - assert_equal(mlp.learning_rate_init / pow(8 + 1, mlp.power_t), + assert (mlp.learning_rate_init / pow(8 + 1, mlp.power_t) == post_eta) @@ -297,7 +297,7 @@ def test_multilabel_classification(): max_iter=150, random_state=0, activation='logistic', learning_rate_init=0.2) mlp.fit(X, y) - assert_greater(mlp.score(X, y), 0.97) + assert mlp.score(X, y) > 0.97 # test partial fit method mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150, @@ -305,7 +305,7 @@ def test_multilabel_classification(): learning_rate_init=0.2) for i in range(100): mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4]) - assert_greater(mlp.score(X, y), 0.9) + assert mlp.score(X, y) > 0.9 # Make sure early stopping still work now that spliting is stratified by # default (it is disabled for multilabel classification) @@ -320,7 +320,7 @@ def test_multioutput_regression(): mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200, random_state=1) mlp.fit(X, y) - assert_greater(mlp.score(X, y), 0.9) + assert mlp.score(X, y) > 0.9 def test_partial_fit_classes_error(): @@ -351,7 +351,7 @@ def test_partial_fit_classification(): mlp.partial_fit(X, y, classes=np.unique(y)) pred2 = mlp.predict(X) assert_array_equal(pred1, pred2) - assert_greater(mlp.score(X, y), 0.95) + assert mlp.score(X, y) > 0.95 def test_partial_fit_unseen_classes(): @@ -362,7 +362,7 @@ def test_partial_fit_unseen_classes(): clf.partial_fit([[1], [2], [3]], ["a", "b", "c"], classes=["a", "b", "c", "d"]) clf.partial_fit([[4]], ["d"]) - assert_greater(clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]), 0) + assert clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]) > 0 def test_partial_fit_regression(): @@ -388,7 +388,7 @@ def test_partial_fit_regression(): pred2 = mlp.predict(X) assert_almost_equal(pred1, pred2, decimal=2) score = mlp.score(X, y) - assert_greater(score, 0.75) + assert score > 0.75 def test_partial_fit_errors(): @@ -450,11 +450,11 @@ def test_predict_proba_binary(): proba_max = y_proba.argmax(axis=1) proba_log_max = y_log_proba.argmax(axis=1) - assert_equal(y_proba.shape, (n_samples, n_classes)) + assert y_proba.shape == (n_samples, n_classes) assert_array_equal(proba_max, proba_log_max) assert_array_equal(y_log_proba, np.log(y_proba)) - assert_equal(roc_auc_score(y, y_proba[:, 1]), 1.0) + assert roc_auc_score(y, y_proba[:, 1]) == 1.0 def test_predict_proba_multiclass(): @@ -473,7 +473,7 @@ def test_predict_proba_multiclass(): proba_max = y_proba.argmax(axis=1) proba_log_max = y_log_proba.argmax(axis=1) - assert_equal(y_proba.shape, (n_samples, n_classes)) + assert y_proba.shape == (n_samples, n_classes) assert_array_equal(proba_max, proba_log_max) assert_array_equal(y_log_proba, np.log(y_proba)) @@ -490,14 +490,14 @@ def test_predict_proba_multilabel(): clf.fit(X, Y) y_proba = clf.predict_proba(X) - assert_equal(y_proba.shape, (n_samples, n_classes)) + assert y_proba.shape == (n_samples, n_classes) assert_array_equal(y_proba > 0.5, Y) y_log_proba = clf.predict_log_proba(X) proba_max = y_proba.argmax(axis=1) proba_log_max = y_log_proba.argmax(axis=1) - assert_greater((y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1), 1e-10) + assert (y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1) > 1e-10 assert_array_equal(proba_max, proba_log_max) assert_array_equal(y_log_proba, np.log(y_proba)) @@ -553,7 +553,7 @@ def test_tolerance(): y = [1, 0] clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd') clf.fit(X, y) - assert_greater(clf.max_iter, clf.n_iter_) + assert clf.max_iter > clf.n_iter_ def test_verbose_sgd(): @@ -580,13 +580,13 @@ def test_early_stopping(): clf = MLPClassifier(tol=tol, max_iter=3000, solver='sgd', early_stopping=True) clf.fit(X, y) - assert_greater(clf.max_iter, clf.n_iter_) + assert clf.max_iter > clf.n_iter_ valid_scores = clf.validation_scores_ best_valid_score = clf.best_validation_score_ - assert_equal(max(valid_scores), best_valid_score) - assert_greater(best_valid_score + tol, valid_scores[-2]) - assert_greater(best_valid_score + tol, valid_scores[-1]) + assert max(valid_scores) == best_valid_score + assert best_valid_score + tol > valid_scores[-2] + assert best_valid_score + tol > valid_scores[-1] def test_adaptive_learning_rate(): @@ -595,8 +595,8 @@ def test_adaptive_learning_rate(): clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd', learning_rate='adaptive') clf.fit(X, y) - assert_greater(clf.max_iter, clf.n_iter_) - assert_greater(1e-6, clf._optimizer.learning_rate) + assert clf.max_iter > clf.n_iter_ + assert 1e-6 > clf._optimizer.learning_rate @ignore_warnings(category=RuntimeWarning) @@ -640,8 +640,8 @@ def test_n_iter_no_change(): clf.fit(X, y) # validate n_iter_no_change - assert_equal(clf._no_improvement_count, n_iter_no_change + 1) - assert_greater(max_iter, clf.n_iter_) + assert clf._no_improvement_count == n_iter_no_change + 1 + assert max_iter > clf.n_iter_ @ignore_warnings(category=ConvergenceWarning) @@ -663,10 +663,10 @@ def test_n_iter_no_change_inf(): clf.fit(X, y) # validate n_iter_no_change doesn't cause early stopping - assert_equal(clf.n_iter_, max_iter) + assert clf.n_iter_ == max_iter # validate _update_no_improvement_count() was always triggered - assert_equal(clf._no_improvement_count, clf.n_iter_ - 1) + assert clf._no_improvement_count == clf.n_iter_ - 1 def test_early_stopping_stratified(): diff --git a/sklearn/neural_network/tests/test_stochastic_optimizers.py b/sklearn/neural_network/tests/test_stochastic_optimizers.py index 372e0bcfd9722..d01b91cbaaf3e 100644 --- a/sklearn/neural_network/tests/test_stochastic_optimizers.py +++ b/sklearn/neural_network/tests/test_stochastic_optimizers.py @@ -53,7 +53,7 @@ def test_sgd_optimizer_trigger_stopping(): lr = 2e-6 optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive') assert not optimizer.trigger_stopping('', False) - assert_equal(lr / 5, optimizer.learning_rate) + assert lr / 5 == optimizer.learning_rate assert optimizer.trigger_stopping('', False) From a4bf61b3fb5b43c88f2a9363c173de7d5d0f4b96 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:01:11 +0200 Subject: [PATCH 16/22] fix preprocessing --- sklearn/preprocessing/tests/test_data.py | 82 +++++++++++------------ sklearn/preprocessing/tests/test_label.py | 22 +++--- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index b49396c7c0253..ef3e4c4768c84 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -93,9 +93,9 @@ def _check_dim_1axis(a): def assert_correct_incr(i, batch_start, batch_stop, n, chunk_size, n_samples_seen): if batch_stop != n: - assert_equal((i + 1) * chunk_size, n_samples_seen) + assert (i + 1) * chunk_size == n_samples_seen else: - assert_equal(i * chunk_size + (batch_stop - batch_start), + assert (i * chunk_size + (batch_stop - batch_start) == n_samples_seen) @@ -128,8 +128,8 @@ def test_polynomial_features(): X_poly = interact.fit_transform(X) assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]]) - assert_equal(interact.powers_.shape, (interact.n_output_features_, - interact.n_input_features_)) + assert interact.powers_.shape == (interact.n_output_features_, + interact.n_input_features_) def test_polynomial_feature_names(): @@ -319,7 +319,7 @@ def test_standard_scaler_1d(): np.zeros_like(n_features)) assert_array_almost_equal(X_scaled.mean(axis=0), .0) assert_array_almost_equal(X_scaled.std(axis=0), 1.) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert scaler.n_samples_seen_ == X.shape[0] # check inverse transform X_scaled_back = scaler.inverse_transform(X_scaled) @@ -333,7 +333,7 @@ def test_standard_scaler_1d(): assert_almost_equal(scaler.scale_, 1.) assert_array_almost_equal(X_scaled.mean(axis=0), .0) assert_array_almost_equal(X_scaled.std(axis=0), .0) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert scaler.n_samples_seen_ == X.shape[0] def test_standard_scaler_dtype(): @@ -408,7 +408,7 @@ def test_scaler_2d_arrays(): scaler = StandardScaler() X_scaled = scaler.fit(X).transform(X, copy=True) assert not np.any(np.isnan(X_scaled)) - assert_equal(scaler.n_samples_seen_, n_samples) + assert scaler.n_samples_seen_ == n_samples assert_array_almost_equal(X_scaled.mean(axis=0), n_features * [0.0]) assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.]) @@ -501,7 +501,7 @@ def test_minmax_scaler_partial_fit(): scaler_incr.data_min_) assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_) - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) @@ -516,7 +516,7 @@ def test_minmax_scaler_partial_fit(): scaler_incr.data_min_) assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_) - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) @@ -548,8 +548,8 @@ def test_standard_scaler_partial_fit(): scaler_incr = scaler_incr.partial_fit(X[batch]) assert_array_almost_equal(scaler_batch.mean_, scaler_incr.mean_) - assert_equal(scaler_batch.var_, scaler_incr.var_) # Nones - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) + assert scaler_batch.var_ == scaler_incr.var_ # Nones + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ # Test std after 1 step batch0 = slice(0, chunk_size) @@ -576,7 +576,7 @@ def test_standard_scaler_partial_fit(): n_samples_seen=scaler_incr.n_samples_seen_) assert_array_almost_equal(scaler_batch.var_, scaler_incr.var_) - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ def test_standard_scaler_partial_fit_numerical_stability(): @@ -665,7 +665,7 @@ def test_standard_scaler_trasform_with_partial_fit(): assert_array_less(zero, scaler_incr.var_ + epsilon) # as less or equal assert_array_less(zero, scaler_incr.scale_ + epsilon) # (i+1) because the Scaler has been already fitted - assert_equal((i + 1), scaler_incr.n_samples_seen_) + assert (i + 1) == scaler_incr.n_samples_seen_ def test_min_max_scaler_iris(): @@ -765,7 +765,7 @@ def test_min_max_scaler_1d(): else: assert_array_almost_equal(X_scaled.min(axis=0), .0) assert_array_almost_equal(X_scaled.max(axis=0), 1.) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert scaler.n_samples_seen_ == X.shape[0] # check inverse transform X_scaled_back = scaler.inverse_transform(X_scaled) @@ -775,9 +775,9 @@ def test_min_max_scaler_1d(): X = np.ones((5, 1)) scaler = MinMaxScaler() X_scaled = scaler.fit(X).transform(X) - assert_greater_equal(X_scaled.min(), 0.) - assert_less_equal(X_scaled.max(), 1.) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert X_scaled.min() >= 0. + assert X_scaled.max() <= 1. + assert scaler.n_samples_seen_ == X.shape[0] # Function interface X_1d = X_1row.ravel() @@ -1386,7 +1386,7 @@ def test_quantile_transform_subsampling(): inf_norm_arr.append(inf_norm) # each random subsampling yield a unique approximation to the expected # linspace CDF - assert_equal(len(np.unique(inf_norm_arr)), len(inf_norm_arr)) + assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr) # sparse support @@ -1404,7 +1404,7 @@ def test_quantile_transform_subsampling(): inf_norm_arr.append(inf_norm) # each random subsampling yield a unique approximation to the expected # linspace CDF - assert_equal(len(np.unique(inf_norm_arr)), len(inf_norm_arr)) + assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr) def test_quantile_transform_sparse_toy(): @@ -1486,14 +1486,14 @@ def test_quantile_transform_bounds(): X = np.random.random((1000, 1)) transformer = QuantileTransformer() transformer.fit(X) - assert_equal(transformer.transform([[-10]]), + assert (transformer.transform([[-10]]) == transformer.transform([[np.min(X)]])) - assert_equal(transformer.transform([[10]]), + assert (transformer.transform([[10]]) == transformer.transform([[np.max(X)]])) - assert_equal(transformer.inverse_transform([[-10]]), + assert (transformer.inverse_transform([[-10]]) == transformer.inverse_transform( [[np.min(transformer.references_)]])) - assert_equal(transformer.inverse_transform([[10]]), + assert (transformer.inverse_transform([[10]]) == transformer.inverse_transform( [[np.max(transformer.references_)]])) @@ -1725,7 +1725,7 @@ def test_maxabs_scaler_1d(): np.ones(n_features)) else: assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert scaler.n_samples_seen_ == X.shape[0] # check inverse transform X_scaled_back = scaler.inverse_transform(X_scaled) @@ -1736,7 +1736,7 @@ def test_maxabs_scaler_1d(): scaler = MaxAbsScaler() X_scaled = scaler.fit(X).transform(X) assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.) - assert_equal(scaler.n_samples_seen_, X.shape[0]) + assert scaler.n_samples_seen_ == X.shape[0] # function interface X_1d = X_1row.ravel() @@ -1769,10 +1769,10 @@ def test_maxabs_scaler_partial_fit(): scaler_incr_csr.max_abs_) assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr_csc.max_abs_) - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) - assert_equal(scaler_batch.n_samples_seen_, + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ + assert (scaler_batch.n_samples_seen_ == scaler_incr_csr.n_samples_seen_) - assert_equal(scaler_batch.n_samples_seen_, + assert (scaler_batch.n_samples_seen_ == scaler_incr_csc.n_samples_seen_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csr.scale_) @@ -1786,7 +1786,7 @@ def test_maxabs_scaler_partial_fit(): scaler_incr = MaxAbsScaler().partial_fit(X[batch0]) assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_) - assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_) + assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) assert_array_almost_equal(scaler_batch.transform(X), scaler_incr.transform(X)) @@ -1966,7 +1966,7 @@ def test_normalize(): for norm in ('l1', 'l2'): X = X.astype(dtype) X_norm = normalize(X, norm=norm) - assert_equal(X_norm.dtype, dtype) + assert X_norm.dtype == dtype X_norm = toarray(X_norm) if norm == 'l1': @@ -2005,23 +2005,23 @@ def test_binarizer(): binarizer = Binarizer(threshold=2.0, copy=True) X_bin = toarray(binarizer.transform(X)) - assert_equal(np.sum(X_bin == 0), 4) - assert_equal(np.sum(X_bin == 1), 2) + assert np.sum(X_bin == 0) == 4 + assert np.sum(X_bin == 1) == 2 X_bin = binarizer.transform(X) - assert_equal(sparse.issparse(X), sparse.issparse(X_bin)) + assert sparse.issparse(X) == sparse.issparse(X_bin) binarizer = Binarizer(copy=True).fit(X) X_bin = toarray(binarizer.transform(X)) assert X_bin is not X - assert_equal(np.sum(X_bin == 0), 2) - assert_equal(np.sum(X_bin == 1), 4) + assert np.sum(X_bin == 0) == 2 + assert np.sum(X_bin == 1) == 4 binarizer = Binarizer(copy=True) X_bin = binarizer.transform(X) assert X_bin is not X X_bin = toarray(X_bin) - assert_equal(np.sum(X_bin == 0), 2) - assert_equal(np.sum(X_bin == 1), 4) + assert np.sum(X_bin == 0) == 2 + assert np.sum(X_bin == 1) == 4 binarizer = Binarizer(copy=False) X_bin = binarizer.transform(X) @@ -2035,16 +2035,16 @@ def test_binarizer(): assert X_bin is X_float X_bin = toarray(X_bin) - assert_equal(np.sum(X_bin == 0), 2) - assert_equal(np.sum(X_bin == 1), 4) + assert np.sum(X_bin == 0) == 2 + assert np.sum(X_bin == 1) == 4 binarizer = Binarizer(threshold=-0.5, copy=True) for init in (np.array, list): X = init(X_.copy()) X_bin = toarray(binarizer.transform(X)) - assert_equal(np.sum(X_bin == 0), 1) - assert_equal(np.sum(X_bin == 1), 5) + assert np.sum(X_bin == 0) == 1 + assert np.sum(X_bin == 1) == 5 X_bin = binarizer.transform(X) # Cannot use threshold < 0 for sparse diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index bd853f0bba59d..ebb7d6b0138f8 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -268,26 +268,26 @@ def test_sparse_output_multilabel_binarizer(): # With fit_transform mlb = MultiLabelBinarizer(sparse_output=sparse_output) got = mlb.fit_transform(inp()) - assert_equal(issparse(got), sparse_output) + assert issparse(got) == sparse_output if sparse_output: # verify CSR assumption that indices and indptr have same dtype - assert_equal(got.indices.dtype, got.indptr.dtype) + assert got.indices.dtype == got.indptr.dtype got = got.toarray() assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) - assert_equal(mlb.inverse_transform(got), inverse) + assert mlb.inverse_transform(got) == inverse # With fit mlb = MultiLabelBinarizer(sparse_output=sparse_output) got = mlb.fit(inp()).transform(inp()) - assert_equal(issparse(got), sparse_output) + assert issparse(got) == sparse_output if sparse_output: # verify CSR assumption that indices and indptr have same dtype - assert_equal(got.indices.dtype, got.indptr.dtype) + assert got.indices.dtype == got.indptr.dtype got = got.toarray() assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) - assert_equal(mlb.inverse_transform(got), inverse) + assert mlb.inverse_transform(got) == inverse assert_raises(ValueError, mlb.inverse_transform, csr_matrix(np.array([[0, 1, 1], @@ -312,14 +312,14 @@ def test_multilabel_binarizer(): got = mlb.fit_transform(inp()) assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) - assert_equal(mlb.inverse_transform(got), inverse) + assert mlb.inverse_transform(got) == inverse # With fit mlb = MultiLabelBinarizer() got = mlb.fit(inp()).transform(inp()) assert_array_equal(indicator_mat, got) assert_array_equal([1, 2, 3], mlb.classes_) - assert_equal(mlb.inverse_transform(got), inverse) + assert mlb.inverse_transform(got) == inverse def test_multilabel_binarizer_empty_sample(): @@ -497,7 +497,7 @@ def check_binarized_results(y, classes, pos_label, neg_label, expected): pos_label=pos_label, sparse_output=sparse_output) assert_array_equal(toarray(binarized), expected) - assert_equal(issparse(binarized), sparse_output) + assert issparse(binarized) == sparse_output # check inverse y_type = type_of_target(y) @@ -519,10 +519,10 @@ def check_binarized_results(y, classes, pos_label, neg_label, expected): sparse_output=sparse_output) binarized = lb.fit_transform(y) assert_array_equal(toarray(binarized), expected) - assert_equal(issparse(binarized), sparse_output) + assert issparse(binarized) == sparse_output inverse_output = lb.inverse_transform(binarized) assert_array_equal(toarray(inverse_output), toarray(y)) - assert_equal(issparse(inverse_output), issparse(y)) + assert issparse(inverse_output) == issparse(y) def test_label_binarize_binary(): From 26216d08faacd4cb160f22bba38c400965762888 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:02:08 +0200 Subject: [PATCH 17/22] fix semi_supervised, svm --- .../tests/test_label_propagation.py | 6 +-- sklearn/svm/tests/test_sparse.py | 2 +- sklearn/svm/tests/test_svm.py | 42 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index ef594fccb7076..d5a4449ee9cc5 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -32,7 +32,7 @@ def test_fit_transduction(): labels = [0, 1, -1] for estimator, parameters in ESTIMATORS: clf = estimator(**parameters).fit(samples, labels) - assert_equal(clf.transduction_[2], 1) + assert clf.transduction_[2] == 1 def test_distribution(): @@ -144,11 +144,11 @@ def test_convergence_warning(): y = np.array([0, 1, -1]) mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) - assert_equal(mdl.n_iter_, mdl.max_iter) + assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1) assert_warns(ConvergenceWarning, mdl.fit, X, y) - assert_equal(mdl.n_iter_, mdl.max_iter) + assert mdl.n_iter_ == mdl.max_iter mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500) assert_no_warnings(mdl.fit, X, y) diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py index 1cf533f856113..3f7e254ba6379 100644 --- a/sklearn/svm/tests/test_sparse.py +++ b/sklearn/svm/tests/test_sparse.py @@ -229,7 +229,7 @@ def test_linearsvc_iris(): sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target) clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target) - assert_equal(clf.fit_intercept, sp_clf.fit_intercept) + assert clf.fit_intercept == sp_clf.fit_intercept assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1) assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1) diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index ad2328a84a61c..d7f7a1534f728 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -57,7 +57,7 @@ def test_libsvm_iris(): # shuffle the dataset so that labels are not ordered for k in ('linear', 'rbf'): clf = svm.SVC(kernel=k).fit(iris.data, iris.target) - assert_greater(np.mean(clf.predict(iris.data) == iris.target), 0.9) + assert np.mean(clf.predict(iris.data) == iris.target) > 0.9 assert hasattr(clf, "coef_") == (k == 'linear') assert_array_equal(clf.classes_, np.sort(clf.classes_)) @@ -65,18 +65,18 @@ def test_libsvm_iris(): # check also the low-level API model = svm.libsvm.fit(iris.data, iris.target.astype(np.float64)) pred = svm.libsvm.predict(iris.data, *model) - assert_greater(np.mean(pred == iris.target), .95) + assert np.mean(pred == iris.target) > .95 model = svm.libsvm.fit(iris.data, iris.target.astype(np.float64), kernel='linear') pred = svm.libsvm.predict(iris.data, *model, kernel='linear') - assert_greater(np.mean(pred == iris.target), .95) + assert np.mean(pred == iris.target) > .95 pred = svm.libsvm.cross_validation(iris.data, iris.target.astype(np.float64), 5, kernel='linear', random_seed=0) - assert_greater(np.mean(pred == iris.target), .95) + assert np.mean(pred == iris.target) > .95 # If random_seed >= 0, the libsvm rng is seeded (by calling `srand`), hence # we should get deterministic results (assuming that there is no other @@ -168,7 +168,7 @@ def test_svr(): svm.LinearSVR(C=10.), svm.LinearSVR(C=10.)): clf.fit(diabetes.data, diabetes.target) - assert_greater(clf.score(diabetes.data, diabetes.target), 0.02) + assert clf.score(diabetes.data, diabetes.target) > 0.02 # non-regression test; previously, BaseLibSVM would check that # len(np.unique(y)) < 2, which must only be done for SVC @@ -243,7 +243,7 @@ def test_oneclass(): pred = clf.predict(T) assert_array_equal(pred, [1, -1, -1]) - assert_equal(pred.dtype, np.dtype('intp')) + assert pred.dtype == np.dtype('intp') assert_array_almost_equal(clf.intercept_, [-1.218], decimal=3) assert_array_almost_equal(clf.dual_coef_, [[0.750, 0.750, 0.750, 0.750]], @@ -272,9 +272,9 @@ def test_oneclass_decision_function(): # predict things y_pred_test = clf.predict(X_test) - assert_greater(np.mean(y_pred_test == 1), .9) + assert np.mean(y_pred_test == 1) > .9 y_pred_outliers = clf.predict(X_outliers) - assert_greater(np.mean(y_pred_outliers == -1), .9) + assert np.mean(y_pred_outliers == -1) > .9 dec_func_test = clf.decision_function(X_test) assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1) dec_func_outliers = clf.decision_function(X_outliers) @@ -360,7 +360,7 @@ def test_decision_function_shape(): clf = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovr').fit(iris.data, iris.target) dec = clf.decision_function(iris.data) - assert_equal(dec.shape, (len(iris.data), 3)) + assert dec.shape == (len(iris.data), 3) assert_array_equal(clf.predict(iris.data), np.argmax(dec, axis=1)) # with five classes: @@ -370,14 +370,14 @@ def test_decision_function_shape(): clf = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovr').fit(X_train, y_train) dec = clf.decision_function(X_test) - assert_equal(dec.shape, (len(X_test), 5)) + assert dec.shape == (len(X_test), 5) assert_array_equal(clf.predict(X_test), np.argmax(dec, axis=1)) # check shape of ovo_decition_function=True clf = svm.SVC(kernel='linear', C=0.1, decision_function_shape='ovo').fit(X_train, y_train) dec = clf.decision_function(X_train) - assert_equal(dec.shape, (len(X_train), 10)) + assert dec.shape == (len(X_train), 10) def test_svr_predict(): @@ -548,7 +548,7 @@ def test_sparse_precomputed(): clf.fit(sparse_gram, [0, 1]) assert not "reached" except TypeError as e: - assert_in("Sparse precomputed", str(e)) + assert "Sparse precomputed" in str(e) def test_linearsvc_parameters(): @@ -715,7 +715,7 @@ def test_crammer_singer_binary(): acc = svm.LinearSVC(fit_intercept=fit_intercept, multi_class="crammer_singer", random_state=0).fit(X, y).score(X, y) - assert_greater(acc, 0.9) + assert acc > 0.9 def test_linearsvc_iris(): @@ -723,8 +723,8 @@ def test_linearsvc_iris(): # Also, test symbolic class names (classes_). target = iris.target_names[iris.target] clf = svm.LinearSVC(random_state=0).fit(iris.data, target) - assert_equal(set(clf.classes_), set(iris.target_names)) - assert_greater(np.mean(clf.predict(iris.data) == target), 0.8) + assert set(clf.classes_) == set(iris.target_names) + assert np.mean(clf.predict(iris.data) == target) > 0.8 dec = clf.decision_function(iris.data) pred = iris.target_names[np.argmax(dec, 1)] @@ -754,7 +754,7 @@ def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC): clf.intercept_scaling = 100 clf.fit(X, y) intercept1 = clf.intercept_ - assert_less(intercept1, -1) + assert intercept1 < -1 # when intercept_scaling is sufficiently high, the intercept value # doesn't depend on intercept_scaling value @@ -883,11 +883,11 @@ def test_linear_svm_convergence_warnings(): lsvc = svm.LinearSVC(random_state=0, max_iter=2) assert_warns(ConvergenceWarning, lsvc.fit, X, Y) - assert_equal(lsvc.n_iter_, 2) + assert lsvc.n_iter_ == 2 lsvr = svm.LinearSVR(random_state=0, max_iter=2) assert_warns(ConvergenceWarning, lsvr.fit, iris.data, iris.target) - assert_equal(lsvr.n_iter_, 2) + assert lsvr.n_iter_ == 2 def test_svr_coef_sign(): @@ -919,7 +919,7 @@ def test_lsvc_intercept_scaling_zero(): lsvc = svm.LinearSVC(fit_intercept=False) lsvc.fit(X, Y) - assert_equal(lsvc.intercept_, 0.) + assert lsvc.intercept_ == 0. def test_hasattr_predict_proba(): @@ -950,7 +950,7 @@ def test_decision_function_shape_two_class(): for estimator in [svm.SVC, svm.NuSVC]: clf = OneVsRestClassifier( estimator(decision_function_shape="ovr")).fit(X, y) - assert_equal(len(clf.predict(X)), len(y)) + assert len(clf.predict(X)) == len(y) def test_ovr_decision_function(): @@ -988,7 +988,7 @@ def test_ovr_decision_function(): pred_class_deci_val = deci_val[range(8), y_pred].reshape((4, 2)) # Assert pred_class_deci_val > 0 here - assert_greater(np.min(pred_class_deci_val), 0.0) + assert np.min(pred_class_deci_val) > 0.0 # Test if the first point has lower decision value on every quadrant # compared to the second point From b46c28e827bd848411ae8445e2b1b4f7ea71a632 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:17:43 +0200 Subject: [PATCH 18/22] fix semi_supervised, tree --- sklearn/tree/tests/test_export.py | 26 ++-- sklearn/tree/tests/test_tree.py | 248 ++++++++++++++---------------- 2 files changed, 130 insertions(+), 144 deletions(-) diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py index 317a11ae25836..f41ef59f7b7a2 100644 --- a/sklearn/tree/tests/test_export.py +++ b/sklearn/tree/tests/test_export.py @@ -46,7 +46,7 @@ def test_graphviz_toy(): 'headlabel="False"] ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test with feature_names contents1 = export_graphviz(clf, feature_names=["feature0", "feature1"], @@ -63,7 +63,7 @@ def test_graphviz_toy(): 'headlabel="False"] ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test with class_names contents1 = export_graphviz(clf, class_names=["yes", "no"], out_file=None) @@ -81,7 +81,7 @@ def test_graphviz_toy(): 'headlabel="False"] ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test plot_options contents1 = export_graphviz(clf, filled=True, impurity=False, @@ -103,7 +103,7 @@ def test_graphviz_toy(): 'headlabel="False"] ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test max_depth contents1 = export_graphviz(clf, max_depth=0, @@ -118,7 +118,7 @@ def test_graphviz_toy(): '0 -> 2 ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test max_depth with plot_options contents1 = export_graphviz(clf, max_depth=0, filled=True, @@ -133,7 +133,7 @@ def test_graphviz_toy(): '0 -> 2 ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test multi-output with weighted samples clf = DecisionTreeClassifier(max_depth=2, @@ -166,7 +166,7 @@ def test_graphviz_toy(): '2 -> 4 ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test regression output with plot_options clf = DecisionTreeRegressor(max_depth=3, @@ -197,7 +197,7 @@ def test_graphviz_toy(): '{rank=same ; 1; 2} ;\n' \ '}' - assert_equal(contents1, contents2) + assert contents1 == contents2 # Test classifier with degraded learning set clf = DecisionTreeClassifier(max_depth=3) @@ -262,7 +262,7 @@ def test_friedman_mse_in_graphviz(): export_graphviz(estimator[0], out_file=dot_data) for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()): - assert_in("friedman_mse", finding.group()) + assert "friedman_mse" in finding.group() def test_precision(): @@ -291,8 +291,8 @@ def test_precision(): # check value for finding in finditer(r"value = \d+\.\d+", dot_data): - assert_less_equal( - len(search(r"\.\d+", finding.group()).group()), + assert ( + len(search(r"\.\d+", finding.group()).group()) <= precision + 1) # check impurity if is_classifier(clf): @@ -302,11 +302,11 @@ def test_precision(): # check impurity for finding in finditer(pattern, dot_data): - assert_equal(len(search(r"\.\d+", finding.group()).group()), + assert (len(search(r"\.\d+", finding.group()).group()) == precision + 1) # check threshold for finding in finditer(r"<= \d+\.\d+", dot_data): - assert_equal(len(search(r"\.\d+", finding.group()).group()), + assert (len(search(r"\.\d+", finding.group()).group()) == precision + 1) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 8d64150cab8f5..89a1816fd852e 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -164,9 +164,9 @@ def assert_tree_equal(d, s, message): - assert_equal(s.node_count, d.node_count, - "{0}: inequal number of node ({1} != {2})" - "".format(message, s.node_count, d.node_count)) + assert s.node_count == d.node_count, ( + "{0}: inequal number of node ({1} != {2})" + "".format(message, s.node_count, d.node_count)) assert_array_equal(d.children_right, s.children_right, message + ": inequal children_right") @@ -248,13 +248,11 @@ def test_xor(): for name, Tree in CLF_TREES.items(): clf = Tree(random_state=0) clf.fit(X, y) - assert_equal(clf.score(X, y), 1.0, - "Failed with {0}".format(name)) + assert clf.score(X, y) == 1.0, "Failed with {0}".format(name) clf = Tree(random_state=0, max_features=1) clf.fit(X, y) - assert_equal(clf.score(X, y), 1.0, - "Failed with {0}".format(name)) + assert clf.score(X, y) == 1.0, "Failed with {0}".format(name) def test_iris(): @@ -263,16 +261,16 @@ def test_iris(): clf = Tree(criterion=criterion, random_state=0) clf.fit(iris.data, iris.target) score = accuracy_score(clf.predict(iris.data), iris.target) - assert_greater(score, 0.9, - "Failed with {0}, criterion = {1} and score = {2}" - "".format(name, criterion, score)) + assert score > 0.9, ( + "Failed with {0}, criterion = {1} and score = {2}" + "".format(name, criterion, score)) clf = Tree(criterion=criterion, max_features=2, random_state=0) clf.fit(iris.data, iris.target) score = accuracy_score(clf.predict(iris.data), iris.target) - assert_greater(score, 0.5, - "Failed with {0}, criterion = {1} and score = {2}" - "".format(name, criterion, score)) + assert score > 0.5, ( + "Failed with {0}, criterion = {1} and score = {2}" + "".format(name, criterion, score)) def test_boston(): @@ -282,18 +280,18 @@ def test_boston(): reg = Tree(criterion=criterion, random_state=0) reg.fit(boston.data, boston.target) score = mean_squared_error(boston.target, reg.predict(boston.data)) - assert_less(score, 1, - "Failed with {0}, criterion = {1} and score = {2}" - "".format(name, criterion, score)) + assert score < 1, ( + "Failed with {0}, criterion = {1} and score = {2}" + "".format(name, criterion, score)) # using fewer features reduces the learning ability of this tree, # but reduces training time. reg = Tree(criterion=criterion, max_features=6, random_state=0) reg.fit(boston.data, boston.target) score = mean_squared_error(boston.target, reg.predict(boston.data)) - assert_less(score, 2, - "Failed with {0}, criterion = {1} and score = {2}" - "".format(name, criterion, score)) + assert score < 2, ( + "Failed with {0}, criterion = {1} and score = {2}" + "".format(name, criterion, score)) def test_probability(): @@ -384,8 +382,8 @@ def test_importances(): importances = clf.feature_importances_ n_important = np.sum(importances > 0.1) - assert_equal(importances.shape[0], 10, "Failed with {0}".format(name)) - assert_equal(n_important, 3, "Failed with {0}".format(name)) + assert importances.shape[0] == 10, "Failed with {0}".format(name) + assert n_important == 3, "Failed with {0}".format(name) # Check on iris that importances are the same for all builders clf = DecisionTreeClassifier(random_state=0) @@ -435,48 +433,48 @@ def test_max_features(): for name, TreeRegressor in REG_TREES.items(): reg = TreeRegressor(max_features="auto") reg.fit(boston.data, boston.target) - assert_equal(reg.max_features_, boston.data.shape[1]) + assert reg.max_features_ == boston.data.shape[1] for name, TreeClassifier in CLF_TREES.items(): clf = TreeClassifier(max_features="auto") clf.fit(iris.data, iris.target) - assert_equal(clf.max_features_, 2) + assert clf.max_features_ == 2 for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(max_features="sqrt") est.fit(iris.data, iris.target) - assert_equal(est.max_features_, + assert (est.max_features_ == int(np.sqrt(iris.data.shape[1]))) est = TreeEstimator(max_features="log2") est.fit(iris.data, iris.target) - assert_equal(est.max_features_, + assert (est.max_features_ == int(np.log2(iris.data.shape[1]))) est = TreeEstimator(max_features=1) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, 1) + assert est.max_features_ == 1 est = TreeEstimator(max_features=3) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, 3) + assert est.max_features_ == 3 est = TreeEstimator(max_features=0.01) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, 1) + assert est.max_features_ == 1 est = TreeEstimator(max_features=0.5) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, + assert (est.max_features_ == int(0.5 * iris.data.shape[1])) est = TreeEstimator(max_features=1.0) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, iris.data.shape[1]) + assert est.max_features_ == iris.data.shape[1] est = TreeEstimator(max_features=None) est.fit(iris.data, iris.target) - assert_equal(est.max_features_, iris.data.shape[1]) + assert est.max_features_ == iris.data.shape[1] # use values of max_features that are invalid est = TreeEstimator(max_features=10) @@ -590,8 +588,7 @@ def test_min_samples_split(): # count samples on nodes, -1 means it is a leaf node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1] - assert_greater(np.min(node_samples), 9, - "Failed with {0}".format(name)) + assert np.min(node_samples) > 9, "Failed with {0}".format(name) # test for float parameter est = TreeEstimator(min_samples_split=0.2, @@ -601,8 +598,7 @@ def test_min_samples_split(): # count samples on nodes, -1 means it is a leaf node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1] - assert_greater(np.min(node_samples), 9, - "Failed with {0}".format(name)) + assert np.min(node_samples) > 9, "Failed with {0}".format(name) def test_min_samples_leaf(): @@ -624,8 +620,7 @@ def test_min_samples_leaf(): node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] - assert_greater(np.min(leaf_count), 4, - "Failed with {0}".format(name)) + assert np.min(leaf_count) > 4, "Failed with {0}".format(name) # test float parameter est = TreeEstimator(min_samples_leaf=0.1, @@ -636,8 +631,7 @@ def test_min_samples_leaf(): node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] - assert_greater(np.min(leaf_count), 4, - "Failed with {0}".format(name)) + assert np.min(leaf_count) > 4, "Failed with {0}".format(name) def check_min_weight_fraction_leaf(name, datasets, sparse=False): @@ -671,12 +665,11 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False): node_weights = np.bincount(out, weights=weights) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_greater_equal( - np.min(leaf_weights), - total_weight * est.min_weight_fraction_leaf, - "Failed with {0} " - "min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf)) + assert ( + np.min(leaf_weights) >= + total_weight * est.min_weight_fraction_leaf), ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf)) # test case with no weights passed in total_weight = X.shape[0] @@ -695,12 +688,11 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False): node_weights = np.bincount(out) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_greater_equal( - np.min(leaf_weights), - total_weight * est.min_weight_fraction_leaf, - "Failed with {0} " - "min_weight_fraction_leaf={1}".format( - name, est.min_weight_fraction_leaf)) + assert ( + np.min(leaf_weights) >= + total_weight * est.min_weight_fraction_leaf), ( + "Failed with {0} min_weight_fraction_leaf={1}".format( + name, est.min_weight_fraction_leaf)) @pytest.mark.parametrize("name", ALL_TREES) @@ -741,15 +733,14 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets, node_weights = np.bincount(out) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_greater_equal( - np.min(leaf_weights), + assert ( + np.min(leaf_weights) >= max((total_weight * - est.min_weight_fraction_leaf), 5), - "Failed with {0} " - "min_weight_fraction_leaf={1}, " - "min_samples_leaf={2}".format(name, - est.min_weight_fraction_leaf, - est.min_samples_leaf)) + est.min_weight_fraction_leaf), 5)), ( + "Failed with {0} min_weight_fraction_leaf={1}, " + "min_samples_leaf={2}".format( + name, est.min_weight_fraction_leaf, + est.min_samples_leaf)) for max_leaf_nodes, frac in product((None, 1000), np.linspace(0, 0.5, 3)): # test float min_samples_leaf est = TreeEstimator(min_weight_fraction_leaf=frac, @@ -766,15 +757,14 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets, node_weights = np.bincount(out) # drop inner nodes leaf_weights = node_weights[node_weights != 0] - assert_greater_equal( - np.min(leaf_weights), + assert ( + np.min(leaf_weights) >= max((total_weight * est.min_weight_fraction_leaf), - (total_weight * est.min_samples_leaf)), - "Failed with {0} " - "min_weight_fraction_leaf={1}, " - "min_samples_leaf={2}".format(name, - est.min_weight_fraction_leaf, - est.min_samples_leaf)) + (total_weight * est.min_samples_leaf))), ( + "Failed with {0} min_weight_fraction_leaf={1}, " + "min_samples_leaf={2}".format(name, + est.min_weight_fraction_leaf, + est.min_samples_leaf)) @pytest.mark.parametrize("name", ALL_TREES) @@ -815,11 +805,10 @@ def test_min_impurity_split(): for node in range(est.tree_.node_count): if (est.tree_.children_left[node] == TREE_LEAF or est.tree_.children_right[node] == TREE_LEAF): - assert_equal(est.tree_.impurity[node], 0., - "Failed with {0} " - "min_impurity_split={1}".format( - est.tree_.impurity[node], - est.min_impurity_split)) + assert est.tree_.impurity[node] == 0., ( + "Failed with {0} min_impurity_split={1}".format( + est.tree_.impurity[node], + est.min_impurity_split)) # verify leaf nodes have impurity [0,min_impurity_split] when using # min_impurity_split @@ -832,16 +821,14 @@ def test_min_impurity_split(): for node in range(est.tree_.node_count): if (est.tree_.children_left[node] == TREE_LEAF or est.tree_.children_right[node] == TREE_LEAF): - assert_greater_equal(est.tree_.impurity[node], 0, - "Failed with {0}, " - "min_impurity_split={1}".format( - est.tree_.impurity[node], - est.min_impurity_split)) - assert_less_equal(est.tree_.impurity[node], min_impurity_split, - "Failed with {0}, " - "min_impurity_split={1}".format( - est.tree_.impurity[node], - est.min_impurity_split)) + assert est.tree_.impurity[node] >= 0, ( + "Failed with {0}, min_impurity_split={1}".format( + est.tree_.impurity[node], + est.min_impurity_split)) + assert est.tree_.impurity[node] <= min_impurity_split, ( + "Failed with {0}, min_impurity_split={1}".format( + est.tree_.impurity[node], + est.min_impurity_split)) def test_min_impurity_decrease(): @@ -868,10 +855,10 @@ def test_min_impurity_decrease(): for est, expected_decrease in ((est1, 1e-7), (est2, 0.05), (est3, 0.0001), (est4, 0.1)): - assert_less_equal(est.min_impurity_decrease, expected_decrease, - "Failed, min_impurity_decrease = {0} > {1}" - .format(est.min_impurity_decrease, - expected_decrease)) + assert est.min_impurity_decrease <= expected_decrease, ( + "Failed, min_impurity_decrease = {0} > {1}".format( + est.min_impurity_decrease, + expected_decrease)) est.fit(X, y) for node in range(est.tree_.node_count): # If current node is a not leaf node, check if the split was @@ -899,11 +886,10 @@ def test_min_impurity_decrease(): actual_decrease = fractional_node_weight * ( imp_parent - wtd_avg_left_right_imp) - assert_greater_equal(actual_decrease, expected_decrease, - "Failed with {0} " - "expected min_impurity_decrease={1}" - .format(actual_decrease, - expected_decrease)) + assert actual_decrease >= expected_decrease, ( + "Failed with {0} expected min_impurity_decrease={1}" + .format(actual_decrease, + expected_decrease)) for name, TreeEstimator in ALL_TREES.items(): if "Classifier" in name: @@ -920,17 +906,17 @@ def test_min_impurity_decrease(): serialized_object = pickle.dumps(est) est2 = pickle.loads(serialized_object) - assert_equal(type(est2), est.__class__) + assert type(est2) == est.__class__ score2 = est2.score(X, y) - assert_equal(score, score2, - "Failed to generate same score after pickling " - "with {0}".format(name)) + assert score == score2, ( + "Failed to generate same score after pickling " + "with {0}".format(name)) for attribute in fitted_attribute: - assert_equal(getattr(est2.tree_, attribute), - fitted_attribute[attribute], - "Failed to generate same attribute {0} after " - "pickling with {1}".format(attribute, name)) + assert (getattr(est2.tree_, attribute) == + fitted_attribute[attribute]), ( + "Failed to generate same attribute {0} after " + "pickling with {1}".format(attribute, name)) def test_multioutput(): @@ -969,24 +955,24 @@ def test_multioutput(): clf = TreeClassifier(random_state=0) y_hat = clf.fit(X, y).predict(T) assert_array_equal(y_hat, y_true) - assert_equal(y_hat.shape, (4, 2)) + assert y_hat.shape == (4, 2) proba = clf.predict_proba(T) - assert_equal(len(proba), 2) - assert_equal(proba[0].shape, (4, 2)) - assert_equal(proba[1].shape, (4, 4)) + assert len(proba) == 2 + assert proba[0].shape == (4, 2) + assert proba[1].shape == (4, 4) log_proba = clf.predict_log_proba(T) - assert_equal(len(log_proba), 2) - assert_equal(log_proba[0].shape, (4, 2)) - assert_equal(log_proba[1].shape, (4, 4)) + assert len(log_proba) == 2 + assert log_proba[0].shape == (4, 2) + assert log_proba[1].shape == (4, 4) # toy regression problem for name, TreeRegressor in REG_TREES.items(): reg = TreeRegressor(random_state=0) y_hat = reg.fit(X, y).predict(T) assert_almost_equal(y_hat, y_true) - assert_equal(y_hat.shape, (4, 2)) + assert y_hat.shape == (4, 2) def test_classes_shape(): @@ -996,15 +982,15 @@ def test_classes_shape(): clf = TreeClassifier(random_state=0) clf.fit(X, y) - assert_equal(clf.n_classes_, 2) + assert clf.n_classes_ == 2 assert_array_equal(clf.classes_, [-1, 1]) # Classification, multi-output _y = np.vstack((y, np.array(y) * 2)).T clf = TreeClassifier(random_state=0) clf.fit(X, _y) - assert_equal(len(clf.n_classes_), 2) - assert_equal(len(clf.classes_), 2) + assert len(clf.n_classes_) == 2 + assert len(clf.classes_) == 2 assert_array_equal(clf.n_classes_, [2, 2]) assert_array_equal(clf.classes_, [[-1, 1], [-2, 2]]) @@ -1090,12 +1076,12 @@ def test_sample_weight(): sample_weight[y == 2] = .51 # Samples of class '2' are still weightier clf = DecisionTreeClassifier(max_depth=1, random_state=0) clf.fit(X, y, sample_weight=sample_weight) - assert_equal(clf.tree_.threshold[0], 149.5) + assert clf.tree_.threshold[0] == 149.5 sample_weight[y == 2] = .5 # Samples of class '2' are no longer weightier clf = DecisionTreeClassifier(max_depth=1, random_state=0) clf.fit(X, y, sample_weight=sample_weight) - assert_equal(clf.tree_.threshold[0], 49.5) # Threshold should have moved + assert clf.tree_.threshold[0] == 49.5 # Threshold should have moved # Test that sample weighting is the same as having duplicates X = iris.data @@ -1214,7 +1200,7 @@ def test_max_leaf_nodes(): k = 4 for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(max_depth=None, max_leaf_nodes=k + 1).fit(X, y) - assert_equal(est.get_n_leaves(), k + 1) + assert est.get_n_leaves() == k + 1 # max_leaf_nodes in (0, 1) should raise ValueError est = TreeEstimator(max_depth=None, max_leaf_nodes=0) @@ -1231,7 +1217,7 @@ def test_max_leaf_nodes_max_depth(): k = 4 for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y) - assert_equal(est.get_depth(), 1) + assert est.get_depth() == 1 def test_arrays_persist(): @@ -1253,7 +1239,7 @@ def test_only_constant_features(): for name, TreeEstimator in ALL_TREES.items(): est = TreeEstimator(random_state=0) est.fit(X, y) - assert_equal(est.tree_.max_depth, 0) + assert est.tree_.max_depth == 0 def test_behaviour_constant_feature_after_splits(): @@ -1265,8 +1251,8 @@ def test_behaviour_constant_feature_after_splits(): if "ExtraTree" not in name: est = TreeEstimator(random_state=0, max_features=1) est.fit(X, y) - assert_equal(est.tree_.max_depth, 2) - assert_equal(est.tree_.node_count, 5) + assert est.tree_.max_depth == 2 + assert est.tree_.node_count == 5 def test_with_only_one_non_constant_features(): @@ -1277,13 +1263,13 @@ def test_with_only_one_non_constant_features(): for name, TreeEstimator in CLF_TREES.items(): est = TreeEstimator(random_state=0, max_features=1) est.fit(X, y) - assert_equal(est.tree_.max_depth, 1) + assert est.tree_.max_depth == 1 assert_array_equal(est.predict_proba(X), np.full((4, 2), 0.5)) for name, TreeEstimator in REG_TREES.items(): est = TreeEstimator(random_state=0, max_features=1) est.fit(X, y) - assert_equal(est.tree_.max_depth, 1) + assert est.tree_.max_depth == 1 assert_array_equal(est.predict(X), np.full((4, ), 0.5)) @@ -1294,7 +1280,7 @@ def test_big_input(): try: clf.fit(X, [0, 1, 0, 1]) except ValueError as e: - assert_in("float32", str(e)) + assert "float32" in str(e) def test_realloc(): @@ -1494,8 +1480,8 @@ def check_explicit_sparse_zeros(tree, max_depth=3, X_sparse_test = X_sparse_test.copy() # Ensure that we have explicit zeros - assert_greater((X_sparse.data == 0.).sum(), 0) - assert_greater((X_sparse_test.data == 0.).sum(), 0) + assert (X_sparse.data == 0.).sum() > 0 + assert (X_sparse_test.data == 0.).sum() > 0 # Perform the comparison d = TreeEstimator(random_state=0, max_depth=max_depth).fit(X, y) @@ -1554,11 +1540,11 @@ def test_1d_input(name): def _check_min_weight_leaf_split_level(TreeEstimator, X, y, sample_weight): est = TreeEstimator(random_state=0) est.fit(X, y, sample_weight=sample_weight) - assert_equal(est.tree_.max_depth, 1) + assert est.tree_.max_depth == 1 est = TreeEstimator(random_state=0, min_weight_fraction_leaf=0.4) est.fit(X, y, sample_weight=sample_weight) - assert_equal(est.tree_.max_depth, 0) + assert est.tree_.max_depth == 0 def check_min_weight_leaf_split_level(name): @@ -1656,7 +1642,7 @@ def check_decision_path(name): node_indicator_csr = est.decision_path(X) node_indicator = node_indicator_csr.toarray() - assert_equal(node_indicator.shape, (n_samples, est.tree_.node_count)) + assert node_indicator.shape == (n_samples, est.tree_.node_count) # Assert that leaves index are correct leaves = est.apply(X) @@ -1670,7 +1656,7 @@ def check_decision_path(name): # Ensure max depth is consistent with sum of indicator max_depth = node_indicator.sum(axis=1).max() - assert_less_equal(est.tree_.max_depth, max_depth) + assert est.tree_.max_depth <= max_depth @pytest.mark.parametrize("name", ALL_TREES) @@ -1801,17 +1787,17 @@ def _pickle_copy(obj): criteria = typename(n_outputs, n_classes) result = copy_func(criteria).__reduce__() typename_, (n_outputs_, n_classes_), _ = result - assert_equal(typename, typename_) - assert_equal(n_outputs, n_outputs_) + assert typename == typename_ + assert n_outputs == n_outputs_ assert_array_equal(n_classes, n_classes_) for _, typename in CRITERIA_REG.items(): criteria = typename(n_outputs, n_samples) result = copy_func(criteria).__reduce__() typename_, (n_outputs_, n_samples_), _ = result - assert_equal(typename, typename_) - assert_equal(n_outputs, n_outputs_) - assert_equal(n_samples, n_samples_) + assert typename == typename_ + assert n_outputs == n_outputs_ + assert n_samples == n_samples_ def test_empty_leaf_infinite_threshold(): From 99e6b5a435a4d55bf2444f084f8c171a3a1dc8c6 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:26:56 +0200 Subject: [PATCH 19/22] fix utils --- sklearn/utils/estimator_checks.py | 80 ++++++++++---------- sklearn/utils/tests/test_class_weight.py | 10 +-- sklearn/utils/tests/test_estimator_checks.py | 4 +- sklearn/utils/tests/test_extmath.py | 34 ++++----- sklearn/utils/tests/test_fast_dict.py | 12 +-- sklearn/utils/tests/test_multiclass.py | 5 +- sklearn/utils/tests/test_murmurhash.py | 34 ++++----- sklearn/utils/tests/test_random.py | 14 ++-- sklearn/utils/tests/test_sparsefuncs.py | 21 ++--- sklearn/utils/tests/test_testing.py | 20 ++--- sklearn/utils/tests/test_utils.py | 26 +++---- sklearn/utils/tests/test_validation.py | 72 +++++++++--------- 12 files changed, 165 insertions(+), 167 deletions(-) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 0bec5c3911681..42220406b477d 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -523,16 +523,16 @@ def check_estimator_sparse_data(name, estimator_orig): if hasattr(estimator, "predict"): pred = estimator.predict(X) if tags['multioutput_only']: - assert_equal(pred.shape, (X.shape[0], 1)) + assert pred.shape == (X.shape[0], 1) else: - assert_equal(pred.shape, (X.shape[0],)) + assert pred.shape == (X.shape[0],) if hasattr(estimator, 'predict_proba'): probs = estimator.predict_proba(X) if tags['binary_only']: expected_probs_shape = (X.shape[0], 2) else: expected_probs_shape = (X.shape[0], 4) - assert_equal(probs.shape, expected_probs_shape) + assert probs.shape == expected_probs_shape except (TypeError, ValueError) as e: if 'sparse' not in repr(e).lower(): if "64" in matrix_format: @@ -721,8 +721,7 @@ def check_dict_unchanged(name, estimator_orig): if hasattr(estimator, method): dict_before = estimator.__dict__.copy() getattr(estimator, method)(X) - assert_dict_equal(estimator.__dict__, dict_before, - 'Estimator changes __dict__ during %s' % method) + assert estimator.__dict__ == dict_before, 'Estimator changes __dict__ during %s' % method def is_public_parameter(attr): @@ -1021,10 +1020,10 @@ def _check_transformer(name, transformer_orig, X, y): if isinstance(X_pred, tuple): for x_pred in X_pred: - assert_equal(x_pred.shape[0], n_samples) + assert x_pred.shape[0] == n_samples else: # check for consistent n_samples - assert_equal(X_pred.shape[0], n_samples) + assert X_pred.shape[0] == n_samples if hasattr(transformer, 'transform'): if name in CROSS_DECOMPOSITION: @@ -1060,8 +1059,8 @@ def _check_transformer(name, transformer_orig, X, y): err_msg="consecutive fit_transform outcomes " "not consistent in %s" % transformer) - assert_equal(_num_samples(X_pred2), n_samples) - assert_equal(_num_samples(X_pred3), n_samples) + assert _num_samples(X_pred2) == n_samples + assert _num_samples(X_pred3) == n_samples # raises error on malformed input for transform if hasattr(X, 'T') and not _safe_tags(transformer, "stateless"): @@ -1355,8 +1354,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False): clusterer.fit(X.tolist()) pred = clusterer.labels_ - assert_equal(pred.shape, (n_samples,)) - assert_greater(adjusted_rand_score(pred, y), 0.4) + assert pred.shape == (n_samples,) + assert adjusted_rand_score(pred, y) > 0.4 if _safe_tags(clusterer, 'non_deterministic'): return set_random_state(clusterer) @@ -1365,8 +1364,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False): assert_array_equal(pred, pred2) # fit_predict(X) and labels_ should be of type int - assert_in(pred.dtype, [np.dtype('int32'), np.dtype('int64')]) - assert_in(pred2.dtype, [np.dtype('int32'), np.dtype('int64')]) + assert pred.dtype in [np.dtype('int32'), np.dtype('int64')] + assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')] # Add noise to X to test the possible values of the labels labels = clusterer.fit_predict(X_noise) @@ -1383,7 +1382,7 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False): # Labels should be less than n_clusters - 1 if hasattr(clusterer, 'n_clusters'): n_clusters = getattr(clusterer, 'n_clusters') - assert_greater_equal(n_clusters - 1, labels_sorted[-1]) + assert n_clusters - 1 >= labels_sorted[-1] # else labels should be less than max(labels_) which is necessarily true @@ -1484,10 +1483,10 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False): assert hasattr(classifier, "classes_") y_pred = classifier.predict(X) - assert_equal(y_pred.shape, (n_samples,)) + assert y_pred.shape == (n_samples,) # training set performance if not tags['poor_score']: - assert_greater(accuracy_score(y, y_pred), 0.83) + assert accuracy_score(y, y_pred) > 0.83 # raises error on malformed input for predict msg_pairwise = ( @@ -1512,13 +1511,13 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False): decision = classifier.decision_function(X) if n_classes == 2: if not tags["multioutput_only"]: - assert_equal(decision.shape, (n_samples,)) + assert decision.shape == (n_samples,) else: - assert_equal(decision.shape, (n_samples, 1)) + assert decision.shape == (n_samples, 1) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) else: - assert_equal(decision.shape, (n_samples, n_classes)) + assert decision.shape == (n_samples, n_classes) assert_array_equal(np.argmax(decision, axis=1), y_pred) # raises error on malformed input for decision_function @@ -1537,7 +1536,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False): if hasattr(classifier, "predict_proba"): # predict_proba agrees with predict y_prob = classifier.predict_proba(X) - assert_equal(y_prob.shape, (n_samples, n_classes)) + assert y_prob.shape == (n_samples, n_classes) assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), @@ -1724,7 +1723,7 @@ def check_supervised_y_2d(name, estimator_orig): ", ".join([str(w_x) for w_x in w])) if not tags['multioutput']: # check that we warned if we don't support multi-output - assert_greater(len(w), 0, msg) + assert len(w) > 0, msg assert "DataConversionWarning('A column-vector y" \ " was passed when a 1d array was expected" in msg assert_allclose(y_pred.ravel(), y_pred_2d.ravel()) @@ -1879,13 +1878,13 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False): regressor.fit(X, y_) regressor.fit(X.tolist(), y_.tolist()) y_pred = regressor.predict(X) - assert_equal(y_pred.shape, y_.shape) + assert y_pred.shape == y_.shape # TODO: find out why PLS and CCA fail. RANSAC is random # and furthermore assumes the presence of outliers, hence # skipped if not _safe_tags(regressor, "poor_score"): - assert_greater(regressor.score(X, y_), 0.5) + assert regressor.score(X, y_) > 0.5 @ignore_warnings @@ -1961,7 +1960,7 @@ def check_class_weight_classifiers(name, classifier_orig): y_pred = classifier.predict(X_test) # XXX: Generally can use 0.89 here. On Windows, LinearSVC gets # 0.88 (Issue #9111) - assert_greater(np.mean(y_pred == 0), 0.87) + assert np.mean(y_pred == 0) > 0.87 @ignore_warnings(category=(DeprecationWarning, FutureWarning)) @@ -1980,7 +1979,7 @@ def check_class_weight_balanced_classifiers(name, classifier_orig, X_train, classifier.set_params(class_weight='balanced') classifier.fit(X_train, y_train) y_pred_balanced = classifier.predict(X_test) - assert_greater(f1_score(y_test, y_pred_balanced, average='weighted'), + assert (f1_score(y_test, y_pred_balanced, average='weighted') > f1_score(y_test, y_pred, average='weighted')) @@ -2056,10 +2055,10 @@ def check_estimators_overwrite_params(name, estimator_orig): # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. - assert_equal(joblib.hash(new_value), joblib.hash(original_value), - "Estimator %s should not change or mutate " - " the parameter %s from %s to %s during fit." - % (name, param_name, original_value, new_value)) + assert joblib.hash(new_value) == joblib.hash(original_value), ( + "Estimator %s should not change or mutate " + " the parameter %s from %s to %s during fit." + % (name, param_name, original_value, new_value)) def check_no_attributes_set_in_init(name, estimator): @@ -2212,13 +2211,13 @@ def param_filter(p): init_params = init_params[1:] for init_param in init_params: - assert_not_equal(init_param.default, init_param.empty, - "parameter %s for %s has no default value" - % (init_param.name, type(estimator).__name__)) + assert init_param.default != init_param.empty, ( + "parameter %s for %s has no default value" + % (init_param.name, type(estimator).__name__)) if type(init_param.default) is type: - assert_in(init_param.default, [np.float64, np.int64]) + assert init_param.default in [np.float64, np.int64] else: - assert_in(type(init_param.default), + assert (type(init_param.default) in [str, int, float, bool, tuple, type(None), np.float64, types.FunctionType, joblib.Memory]) if init_param.name not in params.keys(): @@ -2308,9 +2307,9 @@ def check_transformer_n_iter(name, estimator_orig): # These return a n_iter per component. if name in CROSS_DECOMPOSITION: for iter_ in estimator.n_iter_: - assert_greater_equal(iter_, 1) + assert iter_ >= 1 else: - assert_greater_equal(estimator.n_iter_, 1) + assert estimator.n_iter_ >= 1 @ignore_warnings(category=(DeprecationWarning, FutureWarning)) @@ -2336,7 +2335,7 @@ def check_set_params(name, estimator_orig): estimator.set_params(**orig_params) curr_params = estimator.get_params(deep=False) - assert_equal(set(orig_params.keys()), set(curr_params.keys()), msg) + assert set(orig_params.keys()) == set(curr_params.keys()), msg for k, v in curr_params.items(): assert orig_params[k] is v, msg @@ -2364,7 +2363,7 @@ def check_set_params(name, estimator_orig): params_before_exception = curr_params curr_params = estimator.get_params(deep=False) try: - assert_equal(set(params_before_exception.keys()), + assert (set(params_before_exception.keys()) == set(curr_params.keys())) for k, v in curr_params.items(): assert params_before_exception[k] is v @@ -2372,9 +2371,8 @@ def check_set_params(name, estimator_orig): warnings.warn(change_warning_msg) else: curr_params = estimator.get_params(deep=False) - assert_equal(set(test_params.keys()), - set(curr_params.keys()), - msg) + assert (set(test_params.keys()) == + set(curr_params.keys())), msg for k, v in curr_params.items(): assert test_params[k] is v, msg test_params[param_name] = default_value diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index 751243f796a4c..e67fa6eb898ec 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -99,14 +99,14 @@ def test_compute_class_weight_balanced_negative(): y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = compute_class_weight("balanced", classes, y) - assert_equal(len(cw), len(classes)) + assert len(cw) == len(classes) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = compute_class_weight("balanced", classes, y) - assert_equal(len(cw), len(classes)) + assert len(cw) == len(classes) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.]) @@ -132,16 +132,16 @@ def test_compute_class_weight_default(): # Test for non specified weights cw = compute_class_weight(None, classes, y) - assert_equal(len(cw), classes_len) + assert len(cw) == classes_len assert_array_almost_equal(cw, np.ones(3)) # Tests for partly specified weights cw = compute_class_weight({2: 1.5}, classes, y) - assert_equal(len(cw), classes_len) + assert len(cw) == classes_len assert_array_almost_equal(cw, [1.5, 1., 1.]) cw = compute_class_weight({2: 1.5, 4: 0.5}, classes, y) - assert_equal(len(cw), classes_len) + assert len(cw) == classes_len assert_array_almost_equal(cw, [1.5, 1., 0.5]) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index 73ec02457256c..abcd97a9a41db 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -456,7 +456,7 @@ def test_check_estimator_clones(): # without fitting old_hash = joblib.hash(est) check_estimator(est) - assert_equal(old_hash, joblib.hash(est)) + assert old_hash == joblib.hash(est) with ignore_warnings(category=(FutureWarning, DeprecationWarning)): # when 'est = SGDClassifier()' @@ -467,7 +467,7 @@ def test_check_estimator_clones(): est.fit(iris.data + 10, iris.target) old_hash = joblib.hash(est) check_estimator(est) - assert_equal(old_hash, joblib.hash(est)) + assert old_hash == joblib.hash(est) def test_check_estimators_unfitted(): diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index aad228d4548aa..bc8b598764b1a 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -47,7 +47,7 @@ def test_density(): X_lil = sparse.lil_matrix(X) for X_ in (X_csr, X_csc, X_coo, X_lil): - assert_equal(density(X_), density(X)) + assert density(X_) == density(X) def test_uniform_weights(): @@ -96,7 +96,7 @@ def check_randomized_svd_low_rank(dtype): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.0, random_state=0).astype(dtype, copy=False) - assert_equal(X.shape, (n_samples, n_features)) + assert X.shape == (n_samples, n_features) # compute the singular values of X using the slow exact method U, s, V = linalg.svd(X, full_matrices=False) @@ -123,9 +123,9 @@ def check_randomized_svd_low_rank(dtype): assert sa.dtype == np.float64 assert Va.dtype == np.float64 - assert_equal(Ua.shape, (n_samples, k)) - assert_equal(sa.shape, (k,)) - assert_equal(Va.shape, (k, n_features)) + assert Ua.shape == (n_samples, k) + assert sa.shape == (k,) + assert Va.shape == (k, n_features) # ensure that the singular values of both methods are equal up to the # real rank of the matrix @@ -203,7 +203,7 @@ def test_randomized_svd_low_rank_with_noise(): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.1, random_state=0) - assert_equal(X.shape, (n_samples, n_features)) + assert X.shape == (n_samples, n_features) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) @@ -216,7 +216,7 @@ def test_randomized_svd_low_rank_with_noise(): random_state=0) # the approximation does not tolerate the noise: - assert_greater(np.abs(s[:k] - sa).max(), 0.01) + assert np.abs(s[:k] - sa).max() > 0.01 # compute the singular values of X using the fast approximate # method with iterated power method @@ -240,7 +240,7 @@ def test_randomized_svd_infinite_rank(): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=1.0, random_state=0) - assert_equal(X.shape, (n_samples, n_features)) + assert X.shape == (n_samples, n_features) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) @@ -251,7 +251,7 @@ def test_randomized_svd_infinite_rank(): power_iteration_normalizer=normalizer) # the approximation does not tolerate the noise: - assert_greater(np.abs(s[:k] - sa).max(), 0.1) + assert np.abs(s[:k] - sa).max() > 0.1 # compute the singular values of X using the fast approximate method # with iterated power method @@ -273,7 +273,7 @@ def test_randomized_svd_transpose_consistency(): X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.5, random_state=0) - assert_equal(X.shape, (n_samples, n_features)) + assert X.shape == (n_samples, n_features) U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False, random_state=0) @@ -313,7 +313,7 @@ def test_randomized_svd_power_iteration_normalizer(): power_iteration_normalizer='none') A = X - U.dot(np.diag(s).dot(V)) error_20 = linalg.norm(A, ord='fro') - assert_greater(np.abs(error_2 - error_20), 100) + assert np.abs(error_2 - error_20) > 100 for normalizer in ['LU', 'QR', 'auto']: U, s, V = randomized_svd(X, n_components, n_iter=2, @@ -328,7 +328,7 @@ def test_randomized_svd_power_iteration_normalizer(): random_state=0) A = X - U.dot(np.diag(s).dot(V)) error = linalg.norm(A, ord='fro') - assert_greater(15, np.abs(error_2 - error)) + assert 15 > np.abs(error_2 - error) def test_randomized_svd_sparse_warnings(): @@ -552,7 +552,7 @@ def naive_mean_variance_update(x, last_mean, last_variance, stable_var = two_pass_var # Naive one pass var: >tol (=1063) - assert_greater(np.abs(stable_var(A) - one_pass_var(A)).max(), tol) + assert np.abs(stable_var(A) - one_pass_var(A)).max() > tol # Starting point for online algorithms: after A0 @@ -561,10 +561,10 @@ def naive_mean_variance_update(x, last_mean, last_variance, for i in range(A1.shape[0]): mean, var, n = \ naive_mean_variance_update(A1[i, :], mean, var, n) - assert_equal(n, A.shape[0]) + assert n == A.shape[0] # the mean is also slightly unstable - assert_greater(np.abs(A.mean(axis=0) - mean).max(), 1e-6) - assert_greater(np.abs(stable_var(A) - var).max(), tol) + assert np.abs(A.mean(axis=0) - mean).max() > 1e-6 + assert np.abs(stable_var(A) - var).max() > tol # Robust implementation: np.abs(stable_var(A) - var).max() def test_incremental_variance_ddof(): diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index 1131257330dcf..b060c5f599e9e 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -13,15 +13,15 @@ def test_int_float_dict(): d = IntFloatDict(keys, values) for key, value in zip(keys, values): - assert_equal(d[key], value) - assert_equal(len(d), len(keys)) + assert d[key] == value + assert len(d) == len(keys) d.append(120, 3.) - assert_equal(d[120], 3.0) - assert_equal(len(d), len(keys) + 1) + assert d[120] == 3.0 + assert len(d) == len(keys) + 1 for i in range(2000): d.append(i + 1000, 4.0) - assert_equal(d[1100], 4.0) + assert d[1100] == 4.0 def test_int_float_dict_argmin(): @@ -29,4 +29,4 @@ def test_int_float_dict_argmin(): keys = np.arange(100, dtype=np.intp) values = np.arange(100, dtype=np.float64) d = IntFloatDict(keys, values) - assert_equal(argmin(d), (0, 0)) + assert argmin(d) == (0, 0) diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 6adce033155bd..962d927f43ba1 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -280,9 +280,8 @@ def test_check_classification_targets(): def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: - assert_equal(type_of_target(example), group, - msg=('type_of_target(%r) should be %r, got %r' - % (example, group, type_of_target(example)))) + assert type_of_target(example) == group, ('type_of_target(%r) should be %r, got %r' + % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index 6066012fa0162..abd03bff61d29 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -10,17 +10,17 @@ def test_mmhash3_int(): - assert_equal(murmurhash3_32(3), 847579505) - assert_equal(murmurhash3_32(3, seed=0), 847579505) - assert_equal(murmurhash3_32(3, seed=42), -1823081949) + assert murmurhash3_32(3) == 847579505 + assert murmurhash3_32(3, seed=0) == 847579505 + assert murmurhash3_32(3, seed=42) == -1823081949 - assert_equal(murmurhash3_32(3, positive=False), 847579505) - assert_equal(murmurhash3_32(3, seed=0, positive=False), 847579505) - assert_equal(murmurhash3_32(3, seed=42, positive=False), -1823081949) + assert murmurhash3_32(3, positive=False) == 847579505 + assert murmurhash3_32(3, seed=0, positive=False) == 847579505 + assert murmurhash3_32(3, seed=42, positive=False) == -1823081949 - assert_equal(murmurhash3_32(3, positive=True), 847579505) - assert_equal(murmurhash3_32(3, seed=0, positive=True), 847579505) - assert_equal(murmurhash3_32(3, seed=42, positive=True), 2471885347) + assert murmurhash3_32(3, positive=True) == 847579505 + assert murmurhash3_32(3, seed=0, positive=True) == 847579505 + assert murmurhash3_32(3, seed=42, positive=True) == 2471885347 def test_mmhash3_int_array(): @@ -43,19 +43,19 @@ def test_mmhash3_int_array(): def test_mmhash3_bytes(): - assert_equal(murmurhash3_32(b'foo', 0), -156908512) - assert_equal(murmurhash3_32(b'foo', 42), -1322301282) + assert murmurhash3_32(b'foo', 0) == -156908512 + assert murmurhash3_32(b'foo', 42) == -1322301282 - assert_equal(murmurhash3_32(b'foo', 0, positive=True), 4138058784) - assert_equal(murmurhash3_32(b'foo', 42, positive=True), 2972666014) + assert murmurhash3_32(b'foo', 0, positive=True) == 4138058784 + assert murmurhash3_32(b'foo', 42, positive=True) == 2972666014 def test_mmhash3_unicode(): - assert_equal(murmurhash3_32('foo', 0), -156908512) - assert_equal(murmurhash3_32('foo', 42), -1322301282) + assert murmurhash3_32('foo', 0) == -156908512 + assert murmurhash3_32('foo', 42) == -1322301282 - assert_equal(murmurhash3_32('foo', 0, positive=True), 4138058784) - assert_equal(murmurhash3_32('foo', 42, positive=True), 2972666014) + assert murmurhash3_32('foo', 0, positive=True) == 4138058784 + assert murmurhash3_32('foo', 42, positive=True) == 2972666014 def test_no_collision_on_byte_range(): diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py index 5e31174d725ee..2798edad88cab 100644 --- a/sklearn/utils/tests/test_random.py +++ b/sklearn/utils/tests/test_random.py @@ -37,13 +37,13 @@ def check_edge_case_of_sample_int(sample_without_replacement): assert_raises(ValueError, sample_without_replacement, 1, 2) # n_population == n_samples - assert_equal(sample_without_replacement(0, 0).shape, (0, )) + assert sample_without_replacement(0, 0).shape == (0, ) - assert_equal(sample_without_replacement(1, 1).shape, (1, )) + assert sample_without_replacement(1, 1).shape == (1, ) # n_population >= n_samples - assert_equal(sample_without_replacement(5, 0).shape, (0, )) - assert_equal(sample_without_replacement(5, 1).shape, (1, )) + assert sample_without_replacement(5, 0).shape == (0, ) + assert sample_without_replacement(5, 1).shape == (1, ) # n_population < 0 or n_samples < 0 assert_raises(ValueError, sample_without_replacement, -1, 5) @@ -59,13 +59,13 @@ def check_sample_int(sample_without_replacement): for n_samples in range(n_population + 1): s = sample_without_replacement(n_population, n_samples) - assert_equal(len(s), n_samples) + assert len(s) == n_samples unique = np.unique(s) - assert_equal(np.size(unique), n_samples) + assert np.size(unique) == n_samples assert np.all(unique < n_population) # test edge case n_population == n_samples == 0 - assert_equal(np.size(sample_without_replacement(0, 0)), 0) + assert np.size(sample_without_replacement(0, 0)) == 0 def check_sample_int_distribution(sample_without_replacement): diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py index 31118b2a921f3..fd41b07dafafe 100644 --- a/sklearn/utils/tests/test_sparsefuncs.py +++ b/sklearn/utils/tests/test_sparsefuncs.py @@ -48,8 +48,8 @@ def test_mean_variance_axis0(): for X_sparse in (X_csr, X_csc): X_sparse = X_sparse.astype(input_dtype) X_means, X_vars = mean_variance_axis(X_sparse, axis=0) - assert_equal(X_means.dtype, output_dtype) - assert_equal(X_vars.dtype, output_dtype) + assert X_means.dtype == output_dtype + assert X_vars.dtype == output_dtype assert_array_almost_equal(X_means, np.mean(X_test, axis=0)) assert_array_almost_equal(X_vars, np.var(X_test, axis=0)) @@ -79,8 +79,8 @@ def test_mean_variance_axis1(): for X_sparse in (X_csr, X_csc): X_sparse = X_sparse.astype(input_dtype) X_means, X_vars = mean_variance_axis(X_sparse, axis=0) - assert_equal(X_means.dtype, output_dtype) - assert_equal(X_vars.dtype, output_dtype) + assert X_means.dtype == output_dtype + assert X_vars.dtype == output_dtype assert_array_almost_equal(X_means, np.mean(X_test, axis=0)) assert_array_almost_equal(X_vars, np.var(X_test, axis=0)) @@ -116,13 +116,14 @@ def test_incr_mean_variance_axis(): incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n) assert_array_almost_equal(X_means, X_means_incr) assert_array_almost_equal(X_vars, X_vars_incr) - assert_equal(X.shape[axis], n_incr) # X.shape[axis] picks # samples + # X.shape[axis] picks # samples + assert_array_equal(X.shape[axis], n_incr) X_csc = sp.csc_matrix(X_lil) X_means, X_vars = mean_variance_axis(X_csc, axis) assert_array_almost_equal(X_means, X_means_incr) assert_array_almost_equal(X_vars, X_vars_incr) - assert_equal(X.shape[axis], n_incr) + assert_array_equal(X.shape[axis], n_incr) # Test _incremental_mean_and_var with whole data X = np.vstack(data_chunks) @@ -144,11 +145,11 @@ def test_incr_mean_variance_axis(): X_means_incr, X_vars_incr, n_incr = \ incr_mean_variance_axis(X_sparse, axis, last_mean, last_var, last_n) - assert_equal(X_means_incr.dtype, output_dtype) - assert_equal(X_vars_incr.dtype, output_dtype) + assert X_means_incr.dtype == output_dtype + assert X_vars_incr.dtype == output_dtype assert_array_almost_equal(X_means, X_means_incr) assert_array_almost_equal(X_vars, X_vars_incr) - assert_equal(X.shape[axis], n_incr) + assert_array_equal(X.shape[axis], n_incr) @pytest.mark.parametrize("axis", [0, 1]) @@ -522,7 +523,7 @@ def test_inplace_normalize(): assert X_csr.indices.dtype == index_dtype assert X_csr.indptr.dtype == index_dtype inplace_csr_row_normalize(X_csr) - assert_equal(X_csr.dtype, dtype) + assert X_csr.dtype == dtype if inplace_csr_row_normalize is inplace_csr_row_normalize_l2: X_csr.data **= 2 assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones) diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py index 8bccec90c4856..8d17df6952c06 100644 --- a/sklearn/utils/tests/test_testing.py +++ b/sklearn/utils/tests/test_testing.py @@ -37,24 +37,24 @@ def test_assert_less(): - assert_less(0, 1) + assert 0 < 1 assert_raises(AssertionError, assert_less, 1, 0) def test_assert_greater(): - assert_greater(1, 0) + assert 1 > 0 assert_raises(AssertionError, assert_greater, 0, 1) def test_assert_less_equal(): - assert_less_equal(0, 1) - assert_less_equal(1, 1) + assert 0 <= 1 + assert 1 <= 1 assert_raises(AssertionError, assert_less_equal, 1, 0) def test_assert_greater_equal(): - assert_greater_equal(1, 0) - assert_greater_equal(1, 1) + assert 1 >= 0 + assert 1 >= 1 assert_raises(AssertionError, assert_greater_equal, 0, 1) @@ -64,7 +64,7 @@ def test_set_random_state(): # Linear Discriminant Analysis doesn't have random state: smoke test set_random_state(lda, 3) set_random_state(tree, 3) - assert_equal(tree.random_state, 3) + assert tree.random_state == 3 def test_assert_allclose_dense_sparse(): @@ -236,13 +236,13 @@ def f(): with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) filters_orig = warnings.filters[:] - assert_equal(assert_warns(UserWarning, f), 3) + assert assert_warns(UserWarning, f) == 3 # test that assert_warns doesn't have side effects on warnings # filters - assert_equal(warnings.filters, filters_orig) + assert warnings.filters == filters_orig assert_raises(AssertionError, assert_no_warnings, f) - assert_equal(assert_no_warnings(lambda x: x, 1), 1) + assert assert_no_warnings(lambda x: x, 1) == 1 def test_warn_wrong_warning(self): def f(): diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py index f81a4830d7420..3b831408a821a 100644 --- a/sklearn/utils/tests/test_utils.py +++ b/sklearn/utils/tests/test_utils.py @@ -57,9 +57,9 @@ def ham(): spam = ham() - assert_equal(spam, "spam") # function must remain usable + assert spam == "spam" # function must remain usable - assert_equal(len(w), 1) + assert len(w) == 1 assert issubclass(w[0].category, DeprecationWarning) assert "deprecated" in str(w[0].message).lower() @@ -75,7 +75,7 @@ class Ham: assert hasattr(ham, "SPAM") - assert_equal(len(w), 1) + assert len(w) == 1 assert issubclass(w[0].category, DeprecationWarning) assert "deprecated" in str(w[0].message).lower() @@ -90,7 +90,7 @@ def test_resample(): replace=False, n_samples=3) assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42) # Issue:6581, n_samples can be more when replace is True (default). - assert_equal(len(resample([1, 2], n_samples=5)), 5) + assert len(resample([1, 2], n_samples=5)) == 5 def test_resample_stratified(): @@ -161,10 +161,10 @@ def test_safe_mask(): mask = [False, False, True, True, True] mask = safe_mask(X, mask) - assert_equal(X[mask].shape[0], 3) + assert X[mask].shape[0] == 3 mask = safe_mask(X_csr, mask) - assert_equal(X_csr[mask].shape[0], 3) + assert X_csr[mask].shape[0] == 3 def test_column_or_1d(): @@ -241,7 +241,7 @@ def to_tuple(A): # to make the inner arrays hashable A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # A.shape = (2,2,2) S = set(to_tuple(A)) shuffle(A) # shouldn't raise a ValueError for dim = 3 - assert_equal(set(to_tuple(A)), S) + assert set(to_tuple(A)) == S def test_shuffle_dont_convert_to_array(): @@ -257,20 +257,20 @@ def test_shuffle_dont_convert_to_array(): e = sp.csc_matrix(np.arange(6).reshape(3, 2)) a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0) - assert_equal(a_s, ['c', 'b', 'a']) - assert_equal(type(a_s), list) + assert a_s == ['c', 'b', 'a'] + assert type(a_s) == list assert_array_equal(b_s, ['c', 'b', 'a']) - assert_equal(b_s.dtype, object) + assert b_s.dtype == object - assert_equal(c_s, [3, 2, 1]) - assert_equal(type(c_s), list) + assert c_s == [3, 2, 1] + assert type(c_s) == list assert_array_equal(d_s, np.array([['c', 2], ['b', 1], ['a', 0]], dtype=object)) - assert_equal(type(d_s), MockDataFrame) + assert type(d_s) == MockDataFrame assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 7484eb16882d6..6bdd8d9047376 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -55,13 +55,13 @@ def test_as_float_array(): X = np.ones((3, 10), dtype=np.int32) X = X + np.arange(10, dtype=np.int32) X2 = as_float_array(X, copy=False) - assert_equal(X2.dtype, np.float32) + assert X2.dtype == np.float32 # Another test X = X.astype(np.int64) X2 = as_float_array(X, copy=True) # Checking that the array wasn't overwritten assert as_float_array(X, False) is not X - assert_equal(X2.dtype, np.float64) + assert X2.dtype == np.float64 # Test int dtypes <= 32bit tested_dtypes = [np.bool, np.int8, np.int16, np.int32, @@ -69,12 +69,12 @@ def test_as_float_array(): for dtype in tested_dtypes: X = X.astype(dtype) X2 = as_float_array(X) - assert_equal(X2.dtype, np.float32) + assert X2.dtype == np.float32 # Test object dtype X = X.astype(object) X2 = as_float_array(X, copy=True) - assert_equal(X2.dtype, np.float64) + assert X2.dtype == np.float64 # Here, X is of the right type, it shouldn't be modified X = np.ones((3, 2), dtype=np.float32) @@ -209,7 +209,7 @@ def test_check_array(): assert_raises(TypeError, check_array, X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) - assert_equal(X_array.ndim, 1) + assert X_array.ndim == 1 # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) @@ -235,9 +235,9 @@ def test_check_array(): for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: - assert_equal(X_checked.dtype, dtype) + assert X_checked.dtype == dtype else: - assert_equal(X_checked.dtype, X.dtype) + assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] @@ -273,17 +273,17 @@ def test_check_array(): "Can't check dok sparse matrix for nan or inf."] assert message in messages else: - assert_equal(len(w), 0) + assert len(w) == 0 if dtype is not None: - assert_equal(X_checked.dtype, dtype) + assert X_checked.dtype == dtype else: - assert_equal(X_checked.dtype, X.dtype) + assert X_checked.dtype == X.dtype if X.format in accept_sparse: # no change if allowed - assert_equal(X.format, X_checked.format) + assert X.format == X_checked.format else: # got converted - assert_equal(X_checked.format, accept_sparse[0]) + assert X_checked.format == accept_sparse[0] if copy: assert X is not X_checked else: @@ -322,28 +322,28 @@ def test_check_array_pandas_dtype_object_conversion(): # get converted X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.object) X_df = MockDataFrame(X) - assert_equal(check_array(X_df).dtype.kind, "f") - assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f") + assert check_array(X_df).dtype.kind == "f" + assert check_array(X_df, ensure_2d=False).dtype.kind == "f" # smoke-test against dataframes with column named "dtype" X_df.dtype = "Hans" - assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f") + assert check_array(X_df, ensure_2d=False).dtype.kind == "f" def test_check_array_on_mock_dataframe(): arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]) mock_df = MockDataFrame(arr) checked_arr = check_array(mock_df) - assert_equal(checked_arr.dtype, + assert (checked_arr.dtype == arr.dtype) checked_arr = check_array(mock_df, dtype=np.float32) - assert_equal(checked_arr.dtype, np.dtype(np.float32)) + assert checked_arr.dtype == np.dtype(np.float32) def test_check_array_dtype_stability(): # test that lists with ints don't get converted to floats X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - assert_equal(check_array(X).dtype.kind, "i") - assert_equal(check_array(X, ensure_2d=False).dtype.kind, "i") + assert check_array(X).dtype.kind == "i" + assert check_array(X, ensure_2d=False).dtype.kind == "i" def test_check_array_dtype_warning(): @@ -362,12 +362,12 @@ def test_check_array_dtype_warning(): for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 X_checked = assert_warns(DataConversionWarning, check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 # Check that the warning message includes the name of the Estimator X_checked = assert_warns_message(DataConversionWarning, @@ -377,47 +377,47 @@ def test_check_array_dtype_warning(): accept_sparse=True, warn_on_dtype=True, estimator='SomeEstimator') - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 X_checked, y_checked = assert_warns_message( DataConversionWarning, 'KNeighborsClassifier', check_X_y, X, y, dtype=np.float64, accept_sparse=True, warn_on_dtype=True, estimator=KNeighborsClassifier()) - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 for X in float64_data: with pytest.warns(None) as record: warnings.simplefilter("ignore", DeprecationWarning) # 0.23 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 X_checked = check_array(X, dtype=np.float64, accept_sparse=True, warn_on_dtype=False) - assert_equal(X_checked.dtype, np.float64) + assert X_checked.dtype == np.float64 assert len(record) == 0 for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=True) - assert_equal(X_checked.dtype, np.float32) + assert X_checked.dtype == np.float32 assert X_checked is X X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=True) - assert_equal(X_checked.dtype, np.float32) + assert X_checked.dtype == np.float32 assert X_checked is not X X_checked = assert_no_warnings(check_array, X_csc_float32, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=False) - assert_equal(X_checked.dtype, np.float32) + assert X_checked.dtype == np.float32 assert X_checked is not X_csc_float32 - assert_equal(X_checked.format, 'csr') + assert X_checked.format == 'csr' def test_check_array_warn_on_dtype_deprecation(): @@ -624,7 +624,7 @@ def test_check_symmetric(): output = check_symmetric(arr, raise_warning=False) if sp.issparse(output): - assert_equal(output.format, arr_format) + assert output.format == arr_format assert_array_equal(output.toarray(), arr_sym) else: assert_array_equal(output, arr_sym) @@ -648,18 +648,18 @@ def test_check_is_fitted(): try: check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s") except ValueError as e: - assert_equal(str(e), "Random message ARDRegression, ARDRegression") + assert str(e) == "Random message ARDRegression, ARDRegression" try: check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s") except AttributeError as e: - assert_equal(str(e), "Another message SVR, SVR") + assert str(e) == "Another message SVR, SVR" ard.fit(*make_blobs()) svr.fit(*make_blobs()) - assert_equal(None, check_is_fitted(ard, "coef_")) - assert_equal(None, check_is_fitted(svr, "support_")) + assert None == check_is_fitted(ard, "coef_") + assert None == check_is_fitted(svr, "support_") def test_check_consistent_length(): @@ -763,9 +763,9 @@ class WrongDummyMemory: @pytest.mark.filterwarnings("ignore:The 'cachedir' attribute") def test_check_memory(): memory = check_memory("cache_directory") - assert_equal(memory.cachedir, os.path.join('cache_directory', 'joblib')) + assert memory.cachedir == os.path.join('cache_directory', 'joblib') memory = check_memory(None) - assert_equal(memory.cachedir, None) + assert memory.cachedir == None dummy = DummyMemory() memory = check_memory(dummy) assert memory is dummy From 0e5d4e787a796fcf89e3112511fb95d45bb7d6bf Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 16:53:52 +0200 Subject: [PATCH 20/22] pep8 --- sklearn/cluster/tests/test_bicluster.py | 10 +++---- sklearn/cluster/tests/test_k_means.py | 4 +-- sklearn/datasets/tests/test_base.py | 8 +++--- .../datasets/tests/test_samples_generator.py | 13 ++++++---- sklearn/ensemble/tests/test_base.py | 4 +-- sklearn/ensemble/tests/test_forest.py | 9 ++++--- .../ensemble/tests/test_weight_boosting.py | 3 +-- sklearn/linear_model/tests/test_omp.py | 4 +-- sklearn/linear_model/tests/test_ridge.py | 2 +- sklearn/manifold/tests/test_locally_linear.py | 4 +-- .../manifold/tests/test_spectral_embedding.py | 12 ++++----- sklearn/manifold/tests/test_t_sne.py | 2 +- .../cluster/tests/test_unsupervised.py | 4 +-- sklearn/metrics/tests/test_classification.py | 26 +++++++++---------- sklearn/metrics/tests/test_regression.py | 4 +-- sklearn/model_selection/tests/test_split.py | 2 +- .../model_selection/tests/test_validation.py | 4 +-- sklearn/preprocessing/tests/test_data.py | 2 +- sklearn/tests/test_base.py | 6 ++--- sklearn/tests/test_calibration.py | 2 +- sklearn/tests/test_discriminant_analysis.py | 9 ++++--- sklearn/tests/test_init.py | 2 +- sklearn/tests/test_kernel_approximation.py | 4 +-- sklearn/tests/test_pipeline.py | 2 +- sklearn/tree/tests/test_tree.py | 4 +-- sklearn/utils/estimator_checks.py | 5 ++-- sklearn/utils/tests/test_multiclass.py | 5 ++-- sklearn/utils/tests/test_validation.py | 6 ++--- 28 files changed, 86 insertions(+), 76 deletions(-) diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index a5b486246c821..7c9179938305b 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -86,7 +86,7 @@ def test_spectral_coclustering(): assert_array_equal(model.rows_.sum(axis=0), np.ones(30)) assert_array_equal(model.columns_.sum(axis=0), np.ones(30)) assert consensus_score(model.biclusters_, - (rows, cols)) == 1 + (rows, cols)) == 1 _test_shape_indices(model) @@ -127,7 +127,7 @@ def test_spectral_biclustering(): assert_array_equal(model.columns_.sum(axis=0), np.repeat(3, 30)) assert consensus_score(model.biclusters_, - (rows, cols)) == 1 + (rows, cols)) == 1 _test_shape_indices(model) @@ -217,19 +217,19 @@ def test_perfect_checkerboard(): random_state=0) model.fit(S) assert consensus_score(model.biclusters_, - (rows, cols)) == 1 + (rows, cols)) == 1 S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0) model.fit(S) assert consensus_score(model.biclusters_, - (rows, cols)) == 1 + (rows, cols)) == 1 S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0) model.fit(S) assert consensus_score(model.biclusters_, - (rows, cols)) == 1 + (rows, cols)) == 1 def test_errors(): diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 212c2311a84fd..03e44db0390b7 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -801,8 +801,8 @@ def test_k_means_init_centers(): assert_array_equal(init_centers, init_centers_test) km = KMeans(init=init_centers_test, n_clusters=3, n_init=1) km.fit(X_test) - assert False == np.may_share_memory(km.cluster_centers_, - init_centers) + assert np.may_share_memory(km.cluster_centers_, + init_centers) is False @pytest.mark.parametrize("data", [X, X_csr], ids=["dense", "sparse"]) diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index ef802d0c588a6..34dfce66377e1 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -88,7 +88,7 @@ def test_default_empty_load_files(load_files_root): res = load_files(load_files_root) assert len(res.filenames) == 0 assert len(res.target_names) == 0 - assert res.DESCR == None + assert res.DESCR is None def test_default_load_files(test_category_dir_1, test_category_dir_2, @@ -98,7 +98,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2, res = load_files(load_files_root) assert len(res.filenames) == 1 assert len(res.target_names) == 2 - assert res.DESCR == None + assert res.DESCR is None assert res.data == [b"Hello World!\n"] @@ -120,8 +120,8 @@ def test_load_files_wo_load_content( res = load_files(load_files_root, load_content=False) assert len(res.filenames) == 1 assert len(res.target_names) == 2 - assert res.DESCR == None - assert res.get('data') == None + assert res.DESCR is None + assert res.get('data') is None def test_load_sample_images(): diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 092fcc1290dea..6cf35d91afc45 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -60,7 +60,8 @@ def test_make_classification(): assert X.shape == (2000, 31), "X shape mismatch" assert y.shape == (2000,), "y shape mismatch" assert (np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype) - .reshape(-1, X.shape[1]).shape[0] == 2000), "Unexpected number of unique rows" + .reshape(-1, X.shape[1]).shape[0] == 2000), ( + "Unexpected number of unique rows") def test_make_classification_informative_features(): @@ -112,8 +113,8 @@ def test_make_classification_informative_features(): for clusters in clusters_by_class.values(): assert len(clusters) == n_clusters_per_class, ( "Wrong number of clusters per class") - assert (len(clusters_by_class) - == n_classes), "Wrong number of classes" + assert (len(clusters_by_class) == n_classes), ( + "Wrong number of classes") assert_array_almost_equal(np.bincount(y) / len(y) // weights, [1] * n_classes, @@ -462,8 +463,10 @@ def test_make_circles(): assert_almost_equal(dist_sqr, dist_exp, err_msg="Point is not on expected circle") - assert X[y == 0].shape == (n_outer, 2), "Samples not correctly distributed across circles." - assert X[y == 1].shape == (n_inner, 2), "Samples not correctly distributed across circles." + assert X[y == 0].shape == (n_outer, 2), ( + "Samples not correctly distributed across circles.") + assert X[y == 1].shape == (n_inner, 2), ( + "Samples not correctly distributed across circles.") assert_raises(ValueError, make_circles, factor=-0.01) assert_raises(ValueError, make_circles, factor=1.) diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 7cd2124359e72..16b4df44a5ea8 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -40,7 +40,7 @@ def test_base(): assert 3 == len(ensemble.estimators_) assert isinstance(ensemble[0], Perceptron) - assert ensemble[0].random_state == None + assert ensemble[0].random_state is None assert isinstance(ensemble[1].random_state, int) assert isinstance(ensemble[2].random_state, int) assert ensemble[1].random_state != ensemble[2].random_state @@ -82,7 +82,7 @@ def test_set_random_states(): _set_random_states(LinearDiscriminantAnalysis(), random_state=17) clf1 = Perceptron(tol=1e-3, random_state=None) - assert clf1.random_state == None + assert clf1.random_state is None # check random_state is None still sets _set_random_states(clf1, None) assert isinstance(clf1.random_state, int) diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 3ce35a4a2ccea..228ebdb830e44 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -764,7 +764,8 @@ def check_min_samples_split(name): node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] - assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name) + assert np.min(node_samples) > len(X) * 0.5 - 1, ( + "Failed with {0}".format(name)) est = ForestEstimator(min_samples_split=0.5, n_estimators=1, random_state=0) @@ -772,7 +773,8 @@ def check_min_samples_split(name): node_idx = est.estimators_[0].tree_.children_left != -1 node_samples = est.estimators_[0].tree_.n_node_samples[node_idx] - assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name) + assert np.min(node_samples) > len(X) * 0.5 - 1, ( + "Failed with {0}".format(name)) @pytest.mark.parametrize('name', FOREST_ESTIMATORS) @@ -807,7 +809,8 @@ def check_min_samples_leaf(name): node_counts = np.bincount(out) # drop inner nodes leaf_count = node_counts[node_counts != 0] - assert np.min(leaf_count) > len(X) * 0.25 - 1, "Failed with {0}".format(name) + assert np.min(leaf_count) > len(X) * 0.25 - 1, ( + "Failed with {0}".format(name)) @pytest.mark.parametrize('name', FOREST_ESTIMATORS) diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index 6f03754345519..fb3cce1cef0ae 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -259,8 +259,7 @@ def test_importances(): importances = clf.feature_importances_ assert importances.shape[0] == 10 - assert ((importances[:3, np.newaxis] >= importances[3:]).all() == - True) + assert (importances[:3, np.newaxis] >= importances[3:]).all() def test_error(): diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index 074af8045a59d..c2ba9d02c296a 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -175,8 +175,8 @@ def test_no_atoms(): Xy_empty = np.dot(X.T, y_empty) gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, 1) gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, 1) - assert np.all(gamma_empty == 0) == True - assert np.all(gamma_empty_gram == 0) == True + assert np.all(gamma_empty == 0) + assert np.all(gamma_empty_gram == 0) def test_omp_path(): diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index de517b20e859f..469d0e03d9173 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -1007,7 +1007,7 @@ def test_n_iter(): for solver in ('sparse_cg', 'svd', 'cholesky'): reg = Ridge(solver=solver, max_iter=1, tol=1e-1) reg.fit(X, y_n) - assert reg.n_iter_ == None + assert reg.n_iter_ is None def test_ridge_fit_intercept_sparse(): diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index 2e05710b79b9a..adaddb03d2af3 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -97,8 +97,8 @@ def test_lle_manifold(): details = ("solver: %s, method: %s" % (solver, method)) assert reconstruction_error < tol, details assert (np.abs(clf.reconstruction_error_ - - reconstruction_error) < - tol * reconstruction_error), details + reconstruction_error) < + tol * reconstruction_error), details # Test the error raised when parameter passed to lle is invalid diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index b6e3879b8beab..a14b58eb243fa 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -220,17 +220,17 @@ def test_connectivity(seed=36): [0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 0, 1, 1]]) - assert _graph_is_connected(graph) == False - assert _graph_is_connected(sparse.csr_matrix(graph)) == False - assert _graph_is_connected(sparse.csc_matrix(graph)) == False + assert not _graph_is_connected(graph) + assert not _graph_is_connected(sparse.csr_matrix(graph)) + assert not _graph_is_connected(sparse.csc_matrix(graph)) graph = np.array([[1, 1, 0, 0, 0], [1, 1, 1, 0, 0], [0, 1, 1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 0, 1, 1]]) - assert _graph_is_connected(graph) == True - assert _graph_is_connected(sparse.csr_matrix(graph)) == True - assert _graph_is_connected(sparse.csc_matrix(graph)) == True + assert _graph_is_connected(graph) + assert _graph_is_connected(sparse.csr_matrix(graph)) + assert _graph_is_connected(sparse.csc_matrix(graph)) def test_spectral_embedding_deterministic(): diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index ceb569026489d..5f49cc3e3e507 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -672,7 +672,7 @@ def test_n_iter_without_progress(): # The output needs to contain the value of n_iter_without_progress assert ("did not make any progress during the " - "last -1 episodes. Finished." in out) + "last -1 episodes. Finished." in out) def test_min_grad_norm(): diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py index 29dfd930b0702..3d225321d211b 100644 --- a/sklearn/metrics/cluster/tests/test_unsupervised.py +++ b/sklearn/metrics/cluster/tests/test_unsupervised.py @@ -193,11 +193,11 @@ def test_calinski_harabasz_score(): # Assert the value is 1. when all samples are equals assert 1. == calinski_harabasz_score(np.ones((10, 2)), - [0] * 5 + [1] * 5) + [0] * 5 + [1] * 5) # Assert the value is 0. when all the mean cluster are equal assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10, - [0] * 10 + [1] * 10) + [0] * 10 + [1] * 10) # General case (with non numpy arrays) X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 + diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index d51631a5ff6f9..c65fb969626df 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -1335,7 +1335,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 1.5 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2.5 / 1.5 * 0.25) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1345,7 +1345,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 0.5) assert_almost_equal(f, 0.5) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1356,7 +1356,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 1.5 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2.5 / 1.5 * 0.25) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1369,7 +1369,7 @@ def test_precision_recall_f1_score_multilabel_1(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 0.5) assert_almost_equal(f, 0.5) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.5) @@ -1401,7 +1401,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 0.25) assert_almost_equal(r, 0.25) assert_almost_equal(f, 2 * 0.25 * 0.25 / 0.5) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1411,7 +1411,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 0.25) assert_almost_equal(r, 0.125) assert_almost_equal(f, 2 / 12) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1421,7 +1421,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 2 / 4) assert_almost_equal(r, 1 / 4) assert_almost_equal(f, 2 / 3 * 2 / 4) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1436,7 +1436,7 @@ def test_precision_recall_f1_score_multilabel_2(): assert_almost_equal(p, 1 / 6) assert_almost_equal(r, 1 / 6) assert_almost_equal(f, 2 / 4 * 1 / 3) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.1666, 2) @@ -1466,7 +1466,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 0.5) assert_almost_equal(r, 1.5 / 4) assert_almost_equal(f, 2.5 / (4 * 1.5)) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"), np.mean(f2)) @@ -1476,7 +1476,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 2 / 3) assert_almost_equal(r, 0.5) assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5)) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="micro"), (1 + 4) * p * r / (4 * p + r)) @@ -1486,7 +1486,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 3 / 4) assert_almost_equal(r, 0.5) assert_almost_equal(f, (2 / 1.5 + 1) / 4) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="weighted"), np.average(f2, weights=support)) @@ -1499,7 +1499,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction(): assert_almost_equal(p, 1 / 3) assert_almost_equal(r, 1 / 3) assert_almost_equal(f, 1 / 3) - assert s == None + assert s is None assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"), 0.333, 2) @@ -1518,7 +1518,7 @@ def test_precision_recall_f1_no_labels(beta, average): assert_almost_equal(p, 0) assert_almost_equal(r, 0) assert_almost_equal(f, 0) - assert s == None + assert s is None fbeta = assert_warns(UndefinedMetricWarning, fbeta_score, y_true, y_pred, diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index a40ec3856f201..7903de36260f5 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -144,7 +144,7 @@ def test_regression_multioutput_array(): r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(r, [0, -3.5], decimal=2) assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], - multioutput='uniform_average') + multioutput='uniform_average') evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values') assert_array_almost_equal(evs, [0, -1.25], decimal=2) @@ -156,7 +156,7 @@ def test_regression_multioutput_array(): r2 = r2_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(r2, [1., -3.], decimal=2) assert np.mean(r2) == r2_score(y_true, y_pred, - multioutput='uniform_average') + multioutput='uniform_average') evs = explained_variance_score(y_true, y_pred, multioutput='raw_values') assert_array_almost_equal(evs, [1., -3.], decimal=2) assert np.mean(evs) == explained_variance_score(y_true, y_pred) diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index 583000e2000bc..f0c0f6f453c5c 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -915,7 +915,7 @@ def test_leave_group_out_changing_groups(): groups=groups)) # n_splits = no of unique groups (C(uniq_lbls, 1) = n_unique_groups) assert 3 == LeaveOneGroupOut().get_n_splits(X, y=X, - groups=groups) + groups=groups) def test_leave_one_p_group_out_error_on_fewer_number_of_groups(): diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 1d0f1cb1be8d0..2c84439a7c29d 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -471,8 +471,8 @@ def check_cross_validate_multi_metric(clf, X, y, scores): return_train_score=False) assert isinstance(cv_results, dict) assert (set(cv_results.keys()) == - (keys_with_train if return_train_score - else keys_sans_train)) + (keys_with_train if return_train_score + else keys_sans_train)) assert_array_almost_equal(cv_results['test_r2'], test_r2_scores) assert_array_almost_equal( cv_results['test_neg_mean_squared_error'], test_mse_scores) diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index ef3e4c4768c84..6d21ba340b1ae 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -129,7 +129,7 @@ def test_polynomial_features(): assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]]) assert interact.powers_.shape == (interact.n_output_features_, - interact.n_input_features_) + interact.n_input_features_) def test_polynomial_feature_names(): diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 190af5a8f6800..257753c23584f 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -424,7 +424,7 @@ def test_pickling_when_getstate_is_overwritten_by_mixin(): serialized = pickle.dumps(estimator) estimator_restored = pickle.loads(serialized) assert estimator_restored.attribute_pickled == 5 - assert estimator_restored._attribute_not_pickled == None + assert estimator_restored._attribute_not_pickled is None assert estimator_restored._restored @@ -438,7 +438,7 @@ def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn(): serialized = estimator.__getstate__() assert serialized == {'_attribute_not_pickled': None, - 'attribute_pickled': 5} + 'attribute_pickled': 5} serialized['attribute_pickled'] = 4 estimator.__setstate__(serialized) @@ -467,7 +467,7 @@ def test_pickling_works_when_getstate_is_overwritten_in_the_child_class(): serialized = pickle.dumps(estimator) estimator_restored = pickle.loads(serialized) assert estimator_restored.attribute_pickled == 5 - assert estimator_restored._attribute_not_pickled == None + assert estimator_restored._attribute_not_pickled is None def test_tag_inheritance(): diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index db7ed1920c43e..a7ca57c0c18c6 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -318,7 +318,7 @@ def test_calibration_less_classes(): proba = calibrated_classifier.predict_proba(X) assert_array_equal(proba[:, i], np.zeros(len(y))) assert np.all(np.hstack([proba[:, :i], - proba[:, i + 1:]])) == True + proba[:, i + 1:]])) @ignore_warnings(category=(DeprecationWarning, FutureWarning)) diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index c85227b37a7eb..e6527cc4330f7 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -236,12 +236,14 @@ def test_lda_explained_variance_ratio(): clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen") clf_lda_eigen.fit(X, y) assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3) - assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_" + assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), ( + "Unexpected length for explained_variance_ratio_") clf_lda_svd = LinearDiscriminantAnalysis(solver="svd") clf_lda_svd.fit(X, y) assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3) - assert clf_lda_svd.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_" + assert clf_lda_svd.explained_variance_ratio_.shape == (2,), ( + "Unexpected length for explained_variance_ratio_") assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_, clf_lda_eigen.explained_variance_ratio_) @@ -294,7 +296,8 @@ def test_lda_scaling(): for solver in ('svd', 'lsqr', 'eigen'): clf = LinearDiscriminantAnalysis(solver=solver) # should be able to separate the data perfectly - assert clf.fit(x, y).score(x, y) == 1.0, 'using covariance: %s' % solver + assert clf.fit(x, y).score(x, y) == 1.0, ( + 'using covariance: %s' % solver) def test_lda_store_covariance(): diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py index d936ee4e6d2b7..c2a216dddb937 100644 --- a/sklearn/tests/test_init.py +++ b/sklearn/tests/test_init.py @@ -17,4 +17,4 @@ def test_import_skl(): # Test either above import has failed for some reason # "import *" is discouraged outside of the module level, hence we # rely on setting up the variable above - assert _top_import_error == None + assert _top_import_error is None diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index af589010d6769..79848f5561799 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -65,11 +65,11 @@ def test_additive_chi2_sampler(): # test that the sample_interval is initialized correctly transform = AdditiveChi2Sampler(sample_steps=sample_steps) - assert transform.sample_interval == None + assert transform.sample_interval is None # test that the sample_interval is changed in the fit method transform.fit(X) - assert transform.sample_interval_ != None + assert transform.sample_interval_ is not None # test that the sample_interval is set correctly sample_interval = 0.3 diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index e064f0ba39572..0a2e67d599d85 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -177,7 +177,7 @@ def test_pipeline_init(): # Check that params are set pipe.set_params(svc__a=0.1) assert clf.a == 0.1 - assert clf.b == None + assert clf.b is None # Smoke test the repr: repr(pipe) diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index 89a1816fd852e..dbce4a5d0d560 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -827,8 +827,8 @@ def test_min_impurity_split(): est.min_impurity_split)) assert est.tree_.impurity[node] <= min_impurity_split, ( "Failed with {0}, min_impurity_split={1}".format( - est.tree_.impurity[node], - est.min_impurity_split)) + est.tree_.impurity[node], + est.min_impurity_split)) def test_min_impurity_decrease(): diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 42220406b477d..18cba0cbc56f1 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -721,7 +721,8 @@ def check_dict_unchanged(name, estimator_orig): if hasattr(estimator, method): dict_before = estimator.__dict__.copy() getattr(estimator, method)(X) - assert estimator.__dict__ == dict_before, 'Estimator changes __dict__ during %s' % method + assert estimator.__dict__ == dict_before, ( + 'Estimator changes __dict__ during %s' % method) def is_public_parameter(attr): @@ -2372,7 +2373,7 @@ def check_set_params(name, estimator_orig): else: curr_params = estimator.get_params(deep=False) assert (set(test_params.keys()) == - set(curr_params.keys())), msg + set(curr_params.keys())), msg for k, v in curr_params.items(): assert test_params[k] is v, msg test_params[param_name] = default_value diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 962d927f43ba1..4dc44e797e211 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -280,8 +280,9 @@ def test_check_classification_targets(): def test_type_of_target(): for group, group_examples in EXAMPLES.items(): for example in group_examples: - assert type_of_target(example) == group, ('type_of_target(%r) should be %r, got %r' - % (example, group, type_of_target(example))) + assert type_of_target(example) == group, ( + 'type_of_target(%r) should be %r, got %r' + % (example, group, type_of_target(example))) for example in NON_ARRAY_LIKE_EXAMPLES: msg_regex = r'Expected array-like \(array or non-string sequence\).*' diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 6bdd8d9047376..7cd6929892170 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -658,8 +658,8 @@ def test_check_is_fitted(): ard.fit(*make_blobs()) svr.fit(*make_blobs()) - assert None == check_is_fitted(ard, "coef_") - assert None == check_is_fitted(svr, "support_") + assert check_is_fitted(ard, "coef_") is None + assert check_is_fitted(svr, "support_") is None def test_check_consistent_length(): @@ -765,7 +765,7 @@ def test_check_memory(): memory = check_memory("cache_directory") assert memory.cachedir == os.path.join('cache_directory', 'joblib') memory = check_memory(None) - assert memory.cachedir == None + assert memory.cachedir is None dummy = DummyMemory() memory = check_memory(dummy) assert memory is dummy From d439ad72c84a10387748c5ee1f9331a01cf24a92 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 17:22:03 +0200 Subject: [PATCH 21/22] remove some unused imports --- sklearn/cluster/tests/test_affinity_propagation.py | 2 +- sklearn/cluster/tests/test_bicluster.py | 1 - sklearn/cluster/tests/test_birch.py | 3 --- sklearn/cluster/tests/test_dbscan.py | 3 --- sklearn/cluster/tests/test_hierarchical.py | 1 - sklearn/cluster/tests/test_k_means.py | 3 --- sklearn/cluster/tests/test_optics.py | 1 - sklearn/cluster/tests/test_spectral.py | 1 - sklearn/compose/tests/test_column_transformer.py | 2 -- sklearn/cross_decomposition/tests/test_pls.py | 2 +- sklearn/datasets/tests/test_20news.py | 1 - sklearn/datasets/tests/test_base.py | 1 - sklearn/datasets/tests/test_covtype.py | 2 +- sklearn/datasets/tests/test_kddcup99.py | 2 +- sklearn/datasets/tests/test_lfw.py | 1 - sklearn/datasets/tests/test_samples_generator.py | 2 -- sklearn/datasets/tests/test_svmlight_format.py | 2 -- sklearn/decomposition/tests/test_dict_learning.py | 2 -- sklearn/decomposition/tests/test_factor_analysis.py | 3 --- sklearn/decomposition/tests/test_fastica.py | 2 -- sklearn/decomposition/tests/test_kernel_pca.py | 5 ++--- sklearn/decomposition/tests/test_online_lda.py | 2 -- sklearn/decomposition/tests/test_sparse_pca.py | 1 - sklearn/ensemble/tests/test_bagging.py | 3 --- sklearn/ensemble/tests/test_base.py | 2 -- sklearn/ensemble/tests/test_forest.py | 3 --- sklearn/ensemble/tests/test_gradient_boosting.py | 3 --- .../ensemble/tests/test_gradient_boosting_loss_functions.py | 1 - sklearn/ensemble/tests/test_iforest.py | 2 -- 29 files changed, 6 insertions(+), 53 deletions(-) diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py index 57ab89cfd6e54..3b938ecddc0d2 100644 --- a/sklearn/cluster/tests/test_affinity_propagation.py +++ b/sklearn/cluster/tests/test_affinity_propagation.py @@ -9,7 +9,7 @@ from sklearn.exceptions import ConvergenceWarning from sklearn.utils.testing import ( - assert_equal, assert_array_equal, assert_raises, + assert_array_equal, assert_raises, assert_warns, assert_warns_message, assert_no_warnings) from sklearn.cluster.affinity_propagation_ import AffinityPropagation diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py index 7c9179938305b..4c230f4c2adc0 100644 --- a/sklearn/cluster/tests/test_bicluster.py +++ b/sklearn/cluster/tests/test_bicluster.py @@ -5,7 +5,6 @@ from sklearn.model_selection import ParameterGrid -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py index 522b0304a0111..e4d87f46cc70e 100644 --- a/sklearn/cluster/tests/test_birch.py +++ b/sklearn/cluster/tests/test_birch.py @@ -13,9 +13,6 @@ from sklearn.linear_model import ElasticNet from sklearn.metrics import pairwise_distances_argmin, v_measure_score -from sklearn.utils.testing import assert_greater_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py index 02f110e663ec4..919f4bc4aae74 100644 --- a/sklearn/cluster/tests/test_dbscan.py +++ b/sklearn/cluster/tests/test_dbscan.py @@ -11,11 +11,8 @@ import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_in -from sklearn.utils.testing import assert_not_in from sklearn.neighbors import NearestNeighbors from sklearn.cluster.dbscan_ import DBSCAN from sklearn.cluster.dbscan_ import dbscan diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py index 7cd78a3e0ac40..c630ea5d2e8c0 100644 --- a/sklearn/cluster/tests/test_hierarchical.py +++ b/sklearn/cluster/tests/test_hierarchical.py @@ -16,7 +16,6 @@ from sklearn.metrics.cluster.supervised import adjusted_rand_score from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py index 03e44db0390b7..4fca8f621e141 100644 --- a/sklearn/cluster/tests/test_k_means.py +++ b/sklearn/cluster/tests/test_k_means.py @@ -6,15 +6,12 @@ import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import if_safe_multiprocessing_with_blas diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py index b90d8ee7a1e3d..f71be6bc627c1 100644 --- a/sklearn/cluster/tests/test_optics.py +++ b/sklearn/cluster/tests/test_optics.py @@ -13,7 +13,6 @@ from sklearn.metrics.pairwise import pairwise_distances from sklearn.cluster.dbscan_ import DBSCAN from sklearn.utils import shuffle -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_allclose diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py index 58cc9f4f1036f..df47b089c8d7e 100644 --- a/sklearn/cluster/tests/test_spectral.py +++ b/sklearn/cluster/tests/test_spectral.py @@ -8,7 +8,6 @@ import pickle from sklearn.utils import check_random_state -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index bcbbcc1c3902e..f1abbdccbdb42 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -9,8 +9,6 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_dict_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_allclose_dense_sparse from sklearn.utils.testing import assert_almost_equal diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py index abb305aefdb37..687f28b6104c5 100644 --- a/sklearn/cross_decomposition/tests/test_pls.py +++ b/sklearn/cross_decomposition/tests/test_pls.py @@ -2,7 +2,7 @@ import numpy as np from numpy.testing import assert_approx_equal -from sklearn.utils.testing import (assert_equal, assert_array_almost_equal, +from sklearn.utils.testing import (assert_array_almost_equal, assert_array_equal, assert_raise_message, assert_warns) from sklearn.datasets import load_linnerud diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py index 5b171999433db..04fc994598fe1 100644 --- a/sklearn/datasets/tests/test_20news.py +++ b/sklearn/datasets/tests/test_20news.py @@ -2,7 +2,6 @@ import numpy as np import scipy.sparse as sp -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import SkipTest from sklearn.datasets.tests.test_common import check_return_X_y from functools import partial diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py index 34dfce66377e1..1b58115d337e7 100644 --- a/sklearn/datasets/tests/test_base.py +++ b/sklearn/datasets/tests/test_base.py @@ -27,7 +27,6 @@ from sklearn.externals._pilutil import pillow_installed -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils import IS_PYPY diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py index 0c30a0c7d5b18..3d349f457761f 100644 --- a/sklearn/datasets/tests/test_covtype.py +++ b/sklearn/datasets/tests/test_covtype.py @@ -4,7 +4,7 @@ """ from sklearn.datasets import fetch_covtype -from sklearn.utils.testing import assert_equal, SkipTest +from sklearn.utils.testing import SkipTest from sklearn.datasets.tests.test_common import check_return_X_y from functools import partial diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index 6efb23c6dfd26..f7a24e7d26f86 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -7,7 +7,7 @@ from sklearn.datasets import fetch_kddcup99 from sklearn.datasets.tests.test_common import check_return_X_y -from sklearn.utils.testing import assert_equal, SkipTest +from sklearn.utils.testing import SkipTest from functools import partial diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py index 11211e803f93d..081caed328760 100644 --- a/sklearn/datasets/tests/test_lfw.py +++ b/sklearn/datasets/tests/test_lfw.py @@ -19,7 +19,6 @@ from sklearn.datasets import fetch_lfw_people from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import SkipTest from sklearn.utils.testing import assert_raises from sklearn.datasets.tests.test_common import check_return_X_y diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py index 6cf35d91afc45..90af621f8bb87 100644 --- a/sklearn/datasets/tests/test_samples_generator.py +++ b/sklearn/datasets/tests/test_samples_generator.py @@ -6,11 +6,9 @@ import pytest import scipy.sparse as sp -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index bec67a7aa3819..4ff4e8422817c 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -9,12 +9,10 @@ import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_in from sklearn.utils.testing import fails_if_pypy import sklearn diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py index cbe4c822cb5ab..0e3e2f7e80f1d 100644 --- a/sklearn/decomposition/tests/test_dict_learning.py +++ b/sklearn/decomposition/tests/test_dict_learning.py @@ -9,8 +9,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import TempMemmap diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py index 8547a3c0f6bff..43a8f4b78e13d 100644 --- a/sklearn/decomposition/tests/test_factor_analysis.py +++ b/sklearn/decomposition/tests/test_factor_analysis.py @@ -5,9 +5,6 @@ import numpy as np from sklearn.utils.testing import assert_warns -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 04ef5d6f86fba..6e3b830418291 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -10,8 +10,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py index c5ac24b3423f7..a61406007d5d1 100644 --- a/sklearn/decomposition/tests/test_kernel_pca.py +++ b/sklearn/decomposition/tests/test_kernel_pca.py @@ -2,8 +2,7 @@ import scipy.sparse as sp import pytest -from sklearn.utils.testing import (assert_array_almost_equal, assert_less, - assert_equal, assert_not_equal, +from sklearn.utils.testing import (assert_array_almost_equal, assert_raises, assert_allclose) from sklearn.decomposition import PCA, KernelPCA @@ -108,7 +107,7 @@ def test_kernel_pca_sparse(): # inverse transform # X_pred2 = kpca.inverse_transform(X_pred_transformed) - # assert_equal(X_pred2.shape, X_pred.shape) + # assert X_pred2.shape == X_pred.shape) def test_kernel_pca_linear_kernel(): diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py index 1c13c890c2ea6..dc050221e5661 100644 --- a/sklearn/decomposition/tests/test_online_lda.py +++ b/sklearn/decomposition/tests/test_online_lda.py @@ -12,10 +12,8 @@ _dirichlet_expectation_2d) from sklearn.utils.testing import assert_allclose -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_greater_equal from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import if_safe_multiprocessing_with_blas diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py index 8440dd17717bc..5d3f265cb9418 100644 --- a/sklearn/decomposition/tests/test_sparse_pca.py +++ b/sklearn/decomposition/tests/test_sparse_pca.py @@ -7,7 +7,6 @@ import numpy as np from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import if_safe_multiprocessing_with_blas diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py index f4bda051816ee..345ee90f1fe49 100644 --- a/sklearn/ensemble/tests/test_bagging.py +++ b/sklearn/ensemble/tests/test_bagging.py @@ -12,10 +12,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py index 16b4df44a5ea8..73b7c1e5fba42 100644 --- a/sklearn/ensemble/tests/test_base.py +++ b/sklearn/ensemble/tests/test_base.py @@ -6,10 +6,8 @@ # License: BSD 3 clause import numpy as np -from numpy.testing import assert_equal from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_not_equal from sklearn.datasets import load_iris from sklearn.ensemble import BaggingClassifier diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py index 228ebdb830e44..01102c9679053 100644 --- a/sklearn/ensemble/tests/test_forest.py +++ b/sklearn/ensemble/tests/test_forest.py @@ -28,9 +28,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_less, assert_greater -from sklearn.utils.testing import assert_greater_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index aa041073157a4..17e09f7f07156 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -27,9 +27,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py index d7fbc4a986469..6b24f90d0239d 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py +++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py @@ -5,7 +5,6 @@ import numpy as np from numpy.testing import assert_almost_equal from numpy.testing import assert_allclose -from numpy.testing import assert_equal import pytest from sklearn.utils import check_random_state diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py index 298e0e422cce5..e3ce3c2100793 100644 --- a/sklearn/ensemble/tests/test_iforest.py +++ b/sklearn/ensemble/tests/test_iforest.py @@ -14,8 +14,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns_message -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_allclose From dc5017f445ec89ddbdbca9544074eeb6e533fe92 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Sun, 30 Jun 2019 18:21:45 +0200 Subject: [PATCH 22/22] automatic removal of unused imports --- sklearn/covariance/tests/test_covariance.py | 1 - sklearn/datasets/tests/test_rcv1.py | 1 - sklearn/decomposition/tests/test_nmf.py | 2 -- sklearn/ensemble/tests/test_voting.py | 1 - sklearn/ensemble/tests/test_weight_boosting.py | 1 - sklearn/feature_selection/tests/test_base.py | 2 +- sklearn/feature_selection/tests/test_feature_select.py | 3 --- sklearn/feature_selection/tests/test_from_model.py | 3 --- sklearn/feature_selection/tests/test_rfe.py | 1 - sklearn/linear_model/tests/test_base.py | 2 -- sklearn/linear_model/tests/test_bayes.py | 1 - sklearn/linear_model/tests/test_coordinate_descent.py | 2 -- sklearn/linear_model/tests/test_huber.py | 1 - sklearn/linear_model/tests/test_least_angle.py | 3 --- sklearn/linear_model/tests/test_logistic.py | 2 -- sklearn/linear_model/tests/test_omp.py | 1 - sklearn/linear_model/tests/test_passive_aggressive.py | 2 -- sklearn/linear_model/tests/test_perceptron.py | 1 - sklearn/linear_model/tests/test_ransac.py | 1 - sklearn/linear_model/tests/test_ridge.py | 2 -- sklearn/linear_model/tests/test_sag.py | 1 - sklearn/linear_model/tests/test_sgd.py | 3 --- .../linear_model/tests/test_sparse_coordinate_descent.py | 3 --- sklearn/manifold/tests/test_isomap.py | 1 - sklearn/manifold/tests/test_locally_linear.py | 1 - sklearn/manifold/tests/test_spectral_embedding.py | 2 +- sklearn/manifold/tests/test_t_sne.py | 5 ----- sklearn/metrics/cluster/tests/test_bicluster.py | 2 +- sklearn/metrics/cluster/tests/test_unsupervised.py | 2 -- sklearn/metrics/tests/test_classification.py | 2 -- sklearn/metrics/tests/test_common.py | 1 - sklearn/metrics/tests/test_pairwise.py | 2 -- sklearn/metrics/tests/test_ranking.py | 3 +-- sklearn/metrics/tests/test_regression.py | 1 - sklearn/metrics/tests/test_score_objects.py | 2 -- sklearn/mixture/tests/test_bayesian_mixture.py | 2 +- sklearn/mixture/tests/test_gaussian_mixture.py | 3 --- sklearn/model_selection/tests/test_search.py | 3 --- sklearn/model_selection/tests/test_split.py | 4 ---- sklearn/model_selection/tests/test_validation.py | 3 --- sklearn/neighbors/tests/test_lof.py | 2 -- sklearn/neighbors/tests/test_neighbors.py | 3 --- sklearn/preprocessing/tests/test_data.py | 3 --- sklearn/preprocessing/tests/test_function_transformer.py | 1 - sklearn/preprocessing/tests/test_label.py | 1 - sklearn/semi_supervised/tests/test_label_propagation.py | 1 - sklearn/svm/tests/test_svm.py | 2 -- sklearn/tests/test_base.py | 3 --- sklearn/tests/test_common.py | 2 -- sklearn/tests/test_discriminant_analysis.py | 2 -- sklearn/tests/test_docstring_parameters.py | 2 +- sklearn/tests/test_dummy.py | 1 - sklearn/tests/test_init.py | 1 - sklearn/tests/test_kernel_approximation.py | 4 +--- sklearn/tests/test_multiclass.py | 2 -- sklearn/tests/test_multioutput.py | 3 --- sklearn/tests/test_naive_bayes.py | 2 -- sklearn/tests/test_pipeline.py | 2 -- sklearn/tests/test_random_projection.py | 3 --- sklearn/tree/tests/test_tree.py | 6 ------ sklearn/utils/tests/test_class_weight.py | 1 - sklearn/utils/tests/test_extmath.py | 2 -- sklearn/utils/tests/test_fast_dict.py | 1 - sklearn/utils/tests/test_multiclass.py | 1 - sklearn/utils/tests/test_murmurhash.py | 1 - sklearn/utils/tests/test_validation.py | 1 - 66 files changed, 7 insertions(+), 125 deletions(-) diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py index d7e6428ee27fb..a98e05c7ceaba 100644 --- a/sklearn/covariance/tests/test_covariance.py +++ b/sklearn/covariance/tests/test_covariance.py @@ -11,7 +11,6 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns -from sklearn.utils.testing import assert_greater from sklearn import datasets from sklearn.covariance import empirical_covariance, EmpiricalCovariance, \ diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py index aa747bd5d74fe..2e9f42fa3634b 100644 --- a/sklearn/datasets/tests/test_rcv1.py +++ b/sklearn/datasets/tests/test_rcv1.py @@ -11,7 +11,6 @@ from sklearn.datasets.tests.test_common import check_return_X_y from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import SkipTest diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py index b6703f0c24c0c..35681d7e65736 100644 --- a/sklearn/decomposition/tests/test_nmf.py +++ b/sklearn/decomposition/tests/test_nmf.py @@ -14,8 +14,6 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings from sklearn.utils.extmath import squared_norm from sklearn.base import clone diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py index be29d1fbcff64..767755c23f460 100644 --- a/sklearn/ensemble/tests/test_voting.py +++ b/sklearn/ensemble/tests/test_voting.py @@ -5,7 +5,6 @@ from sklearn.utils.testing import assert_almost_equal, assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raise_message from sklearn.exceptions import NotFittedError from sklearn.linear_model import LinearRegression diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py index fb3cce1cef0ae..1cb1e9d1431cf 100755 --- a/sklearn/ensemble/tests/test_weight_boosting.py +++ b/sklearn/ensemble/tests/test_weight_boosting.py @@ -4,7 +4,6 @@ from sklearn.utils.testing import assert_array_equal, assert_array_less from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal, assert_greater from sklearn.utils.testing import assert_raises, assert_raises_regexp from sklearn.base import BaseEstimator diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py index f2e3b36d456b5..f75f1789243fc 100644 --- a/sklearn/feature_selection/tests/test_base.py +++ b/sklearn/feature_selection/tests/test_base.py @@ -6,7 +6,7 @@ from sklearn.base import BaseEstimator from sklearn.feature_selection.base import SelectorMixin from sklearn.utils import check_array -from sklearn.utils.testing import assert_raises, assert_equal +from sklearn.utils.testing import assert_raises class StepSelector(SelectorMixin, BaseEstimator): diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py index ac1822e1a6063..0283c3f6aba89 100644 --- a/sklearn/feature_selection/tests/test_feature_select.py +++ b/sklearn/feature_selection/tests/test_feature_select.py @@ -8,13 +8,10 @@ import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_not_in -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py index a5f61bfa0f061..3c281c552c7d5 100644 --- a/sklearn/feature_selection/tests/test_from_model.py +++ b/sklearn/feature_selection/tests/test_from_model.py @@ -1,9 +1,6 @@ import pytest import numpy as np -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_allclose diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py index 1ad5375edb6f3..0ef1cb12efdba 100644 --- a/sklearn/feature_selection/tests/test_rfe.py +++ b/sklearn/feature_selection/tests/test_rfe.py @@ -16,7 +16,6 @@ from sklearn.utils import check_random_state from sklearn.utils.testing import ignore_warnings -from sklearn.utils.testing import assert_greater, assert_equal from sklearn.metrics import make_scorer from sklearn.metrics import get_scorer diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py index 84676151d2a74..c9c240125997c 100644 --- a/sklearn/linear_model/tests/test_base.py +++ b/sklearn/linear_model/tests/test_base.py @@ -12,7 +12,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_allclose from sklearn.linear_model.base import LinearRegression @@ -20,7 +19,6 @@ from sklearn.linear_model.base import _rescale_data from sklearn.linear_model.base import make_dataset from sklearn.utils import check_random_state -from sklearn.utils.testing import assert_greater from sklearn.datasets.samples_generator import make_sparse_uncorrelated from sklearn.datasets.samples_generator import make_regression from sklearn.datasets import load_iris diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py index 355cd042347af..f04d7d9569c49 100644 --- a/sklearn/linear_model/tests/test_bayes.py +++ b/sklearn/linear_model/tests/test_bayes.py @@ -11,7 +11,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_less -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raise_message from sklearn.utils import check_random_state from sklearn.linear_model.bayes import BayesianRidge, ARDRegression diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py index aa6773fce415b..005c0bff343b0 100644 --- a/sklearn/linear_model/tests/test_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_coordinate_descent.py @@ -11,8 +11,6 @@ from sklearn.exceptions import ConvergenceWarning from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_raise_message diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py index 4145888c3d996..be2b0106a8ef3 100644 --- a/sklearn/linear_model/tests/test_huber.py +++ b/sklearn/linear_model/tests/test_huber.py @@ -8,7 +8,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_greater from sklearn.datasets import make_regression from sklearn.linear_model import ( diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py index 9633989693a83..dbef55b973c7b 100644 --- a/sklearn/linear_model/tests/test_least_angle.py +++ b/sklearn/linear_model/tests/test_least_angle.py @@ -9,9 +9,6 @@ from sklearn.model_selection import train_test_split from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py index cdbe2f9b3ba37..6fe862db591b4 100644 --- a/sklearn/linear_model/tests/test_logistic.py +++ b/sklearn/linear_model/tests/test_logistic.py @@ -19,8 +19,6 @@ from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py index c2ba9d02c296a..a2f89b30935bd 100644 --- a/sklearn/linear_model/tests/test_omp.py +++ b/sklearn/linear_model/tests/test_omp.py @@ -4,7 +4,6 @@ import numpy as np from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_warns diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py index d0253bbce64f6..8e8bfdc8b9800 100644 --- a/sklearn/linear_model/tests/test_passive_aggressive.py +++ b/sklearn/linear_model/tests/test_passive_aggressive.py @@ -3,8 +3,6 @@ import pytest -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_array_almost_equal, assert_array_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py index 75b91e7b50ba9..bce518b5f2e37 100644 --- a/sklearn/linear_model/tests/test_perceptron.py +++ b/sklearn/linear_model/tests/test_perceptron.py @@ -3,7 +3,6 @@ import pytest from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises from sklearn.utils import check_random_state diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py index af8153590d2ff..5020c2ceb4d32 100644 --- a/sklearn/linear_model/tests/test_ransac.py +++ b/sklearn/linear_model/tests/test_ransac.py @@ -7,7 +7,6 @@ from numpy.testing import assert_array_equal from sklearn.utils import check_random_state -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises_regexp diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py index 469d0e03d9173..2743414b7e60c 100644 --- a/sklearn/linear_model/tests/test_ridge.py +++ b/sklearn/linear_model/tests/test_ridge.py @@ -8,9 +8,7 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_raises_regex diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py index 3407d00fb1cc4..99c9c0009435b 100644 --- a/sklearn/linear_model/tests/test_sag.py +++ b/sklearn/linear_model/tests/test_sag.py @@ -19,7 +19,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raise_message from sklearn.utils import compute_class_weight from sklearn.utils import check_random_state diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py index abdb2ecefd10b..1dd2f48895649 100644 --- a/sklearn/linear_model/tests/test_sgd.py +++ b/sklearn/linear_model/tests/test_sgd.py @@ -9,10 +9,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import assert_warns from sklearn.utils.testing import ignore_warnings diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py index 326bcc94433bc..6f20df5caaa6a 100644 --- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py +++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py @@ -3,10 +3,7 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_warns from sklearn.exceptions import ConvergenceWarning diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py index da8607a31b916..28505e311abc4 100644 --- a/sklearn/manifold/tests/test_isomap.py +++ b/sklearn/manifold/tests/test_isomap.py @@ -8,7 +8,6 @@ from sklearn import neighbors from sklearn import pipeline from sklearn import preprocessing -from sklearn.utils.testing import assert_less from scipy.sparse import rand as sparse_rand diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py index adaddb03d2af3..09a748dab90c3 100644 --- a/sklearn/manifold/tests/test_locally_linear.py +++ b/sklearn/manifold/tests/test_locally_linear.py @@ -6,7 +6,6 @@ from sklearn import neighbors, manifold from sklearn.manifold.locally_linear import barycenter_kneighbors_graph -from sklearn.utils.testing import assert_less from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_raises diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py index a14b58eb243fa..9209f5762d221 100644 --- a/sklearn/manifold/tests/test_spectral_embedding.py +++ b/sklearn/manifold/tests/test_spectral_embedding.py @@ -17,7 +17,7 @@ from sklearn.utils.extmath import _deterministic_vector_sign_flip from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal, assert_raises +from sklearn.utils.testing import assert_raises from sklearn.utils.testing import SkipTest diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py index 5f49cc3e3e507..2e38169a3de6a 100644 --- a/sklearn/manifold/tests/test_t_sne.py +++ b/sklearn/manifold/tests/test_t_sne.py @@ -8,15 +8,10 @@ from sklearn.neighbors import BallTree from sklearn.neighbors import NearestNeighbors -from sklearn.utils.testing import assert_less_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raises_regexp -from sklearn.utils.testing import assert_in from sklearn.utils.testing import skip_if_32bit from sklearn.utils import check_random_state from sklearn.manifold.t_sne import _joint_probabilities diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py index d98ee1fc86b20..d56e5b088df02 100644 --- a/sklearn/metrics/cluster/tests/test_bicluster.py +++ b/sklearn/metrics/cluster/tests/test_bicluster.py @@ -2,7 +2,7 @@ import numpy as np -from sklearn.utils.testing import assert_equal, assert_almost_equal +from sklearn.utils.testing import assert_almost_equal from sklearn.metrics.cluster.bicluster import _jaccard from sklearn.metrics import consensus_score diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py index 3d225321d211b..02a4e85501e77 100644 --- a/sklearn/metrics/cluster/tests/test_unsupervised.py +++ b/sklearn/metrics/cluster/tests/test_unsupervised.py @@ -5,10 +5,8 @@ from sklearn import datasets from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_warns_message from sklearn.metrics.cluster import silhouette_score from sklearn.metrics.cluster import silhouette_samples diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py index c65fb969626df..d9a5749980179 100644 --- a/sklearn/metrics/tests/test_classification.py +++ b/sklearn/metrics/tests/test_classification.py @@ -15,7 +15,6 @@ from sklearn.utils.validation import check_random_state from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal @@ -23,7 +22,6 @@ from sklearn.utils.testing import assert_warns_div0 from sklearn.utils.testing import assert_no_warnings from sklearn.utils.testing import assert_warns_message -from sklearn.utils.testing import assert_not_equal from sklearn.utils.testing import ignore_warnings from sklearn.utils.mocking import MockDataFrame diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py index 13ab6ecd3d804..67e9b66a4b695 100644 --- a/sklearn/metrics/tests/test_common.py +++ b/sklearn/metrics/tests/test_common.py @@ -20,7 +20,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_less -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py index ecf943a4c0bcc..89d343d092fdc 100644 --- a/sklearn/metrics/tests/test_pairwise.py +++ b/sklearn/metrics/tests/test_pairwise.py @@ -11,11 +11,9 @@ from sklearn import config_context -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regexp diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py index 78e2d2a69a48c..140c1c7abad9c 100644 --- a/sklearn/metrics/tests/test_ranking.py +++ b/sklearn/metrics/tests/test_ranking.py @@ -12,9 +12,8 @@ from sklearn.utils.validation import check_array, check_consistent_length from sklearn.utils.validation import check_random_state -from sklearn.utils.testing import assert_raises, clean_warning_registry +from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py index 7903de36260f5..bc4cacb62e8d7 100644 --- a/sklearn/metrics/tests/test_regression.py +++ b/sklearn/metrics/tests/test_regression.py @@ -4,7 +4,6 @@ import pytest from sklearn.utils.testing import assert_raises, assert_raises_regex -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 8ce7fd6389271..f1b9120b06442 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -10,11 +10,9 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regexp from sklearn.utils.testing import ignore_warnings -from sklearn.utils.testing import assert_not_equal from sklearn.base import BaseEstimator from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score, diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py index d62920af454e5..74426c81ef803 100644 --- a/sklearn/mixture/tests/test_bayesian_mixture.py +++ b/sklearn/mixture/tests/test_bayesian_mixture.py @@ -20,7 +20,7 @@ from sklearn.mixture.tests.test_gaussian_mixture import RandomData from sklearn.exceptions import ConvergenceWarning, NotFittedError -from sklearn.utils.testing import assert_greater_equal, ignore_warnings +from sklearn.utils.testing import ignore_warnings COVARIANCE_TYPE = ['full', 'tied', 'diag', 'spherical'] diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index c1f451c7d8495..66a42bd843283 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -28,9 +28,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_greater_equal from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import ignore_warnings diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py index 695e68aae7dfe..90a837e7f49f1 100644 --- a/sklearn/model_selection/tests/test_search.py +++ b/sklearn/model_selection/tests/test_search.py @@ -13,8 +13,6 @@ import pytest from sklearn.utils.fixes import sp_version -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_not_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message @@ -23,7 +21,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_greater_equal from sklearn.utils.testing import ignore_warnings from sklearn.utils.mocking import CheckingClassifier, MockDataFrame diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py index f0c0f6f453c5c..a6f1fbee7cf90 100644 --- a/sklearn/model_selection/tests/test_split.py +++ b/sklearn/model_selection/tests/test_split.py @@ -7,13 +7,9 @@ from itertools import combinations from itertools import combinations_with_replacement -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regexp -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_greater_equal -from sklearn.utils.testing import assert_not_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py index 2c84439a7c29d..6fa2e4fee5ed7 100644 --- a/sklearn/model_selection/tests/test_validation.py +++ b/sklearn/model_selection/tests/test_validation.py @@ -13,15 +13,12 @@ from sklearn.model_selection.tests.test_search import FailingClassifier -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_raises_regex -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_allclose diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py index a00017494e328..1dc13f4ac759a 100644 --- a/sklearn/neighbors/tests/test_lof.py +++ b/sklearn/neighbors/tests/test_lof.py @@ -13,9 +13,7 @@ from sklearn.metrics import roc_auc_score from sklearn.utils import check_random_state -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index afa7159d3d61c..6e440aebfc427 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -16,9 +16,6 @@ from sklearn.neighbors.base import VALID_METRICS_SPARSE, VALID_METRICS from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_in from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_warns diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py index 6d21ba340b1ae..46769cad40edf 100644 --- a/sklearn/preprocessing/tests/test_data.py +++ b/sklearn/preprocessing/tests/test_data.py @@ -21,9 +21,6 @@ from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_less -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater_equal -from sklearn.utils.testing import assert_less_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py index 1735b0021c545..73d7f40927b5e 100644 --- a/sklearn/preprocessing/tests/test_function_transformer.py +++ b/sklearn/preprocessing/tests/test_function_transformer.py @@ -6,7 +6,6 @@ from sklearn.utils.testing import (assert_equal, assert_array_equal, assert_allclose_dense_sparse) from sklearn.utils.testing import assert_warns_message, assert_no_warnings -from sklearn.utils.testing import ignore_warnings def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X): diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py index ebb7d6b0138f8..a095f4ec64cab 100644 --- a/sklearn/preprocessing/tests/test_label.py +++ b/sklearn/preprocessing/tests/test_label.py @@ -12,7 +12,6 @@ from sklearn.utils.multiclass import type_of_target from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py index d5a4449ee9cc5..aff9bba67d298 100644 --- a/sklearn/semi_supervised/tests/test_label_propagation.py +++ b/sklearn/semi_supervised/tests/test_label_propagation.py @@ -2,7 +2,6 @@ import numpy as np -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_no_warnings diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py index d7f7a1534f728..55d60b5351a4b 100644 --- a/sklearn/svm/tests/test_svm.py +++ b/sklearn/svm/tests/test_svm.py @@ -17,8 +17,6 @@ from sklearn.metrics import f1_score from sklearn.metrics.pairwise import rbf_kernel from sklearn.utils import check_random_state -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater, assert_in, assert_less from sklearn.utils.testing import assert_raises_regexp, assert_warns from sklearn.utils.testing import assert_warns_message, assert_raise_message from sklearn.utils.testing import ignore_warnings, assert_raises diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 257753c23584f..032d9b232523f 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -7,12 +7,9 @@ import sklearn from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_not_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_no_warnings from sklearn.utils.testing import assert_warns_message -from sklearn.utils.testing import assert_dict_equal from sklearn.utils.testing import ignore_warnings from sklearn.base import BaseEstimator, clone, is_classifier diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index af5ea34e1d604..51f71f2f7919b 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -16,8 +16,6 @@ import pytest from sklearn.utils.testing import all_estimators -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_in from sklearn.utils.testing import ignore_warnings from sklearn.exceptions import ConvergenceWarning, SkipTestWarning diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py index e6527cc4330f7..6a32c4ec15058 100644 --- a/sklearn/tests/test_discriminant_analysis.py +++ b/sklearn/tests/test_discriminant_analysis.py @@ -11,12 +11,10 @@ assert_warns_message) from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_allclose -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_warns -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import ignore_warnings from sklearn.datasets import make_blobs diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py index 936b691a12485..a59ab5c7442dd 100644 --- a/sklearn/tests/test_docstring_parameters.py +++ b/sklearn/tests/test_docstring_parameters.py @@ -7,7 +7,7 @@ import importlib from pkgutil import walk_packages -from inspect import getsource, isabstract, signature +from inspect import signature import sklearn from sklearn.utils import IS_PYPY diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py index dc80b2d073d81..4301a4c07654f 100644 --- a/sklearn/tests/test_dummy.py +++ b/sklearn/tests/test_dummy.py @@ -7,7 +7,6 @@ from sklearn.base import clone from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns_message diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py index c2a216dddb937..06aeeacd1c9a0 100644 --- a/sklearn/tests/test_init.py +++ b/sklearn/tests/test_init.py @@ -1,6 +1,5 @@ # Basic unittests to test functioning of module's top-level -from sklearn.utils.testing import assert_equal __author__ = 'Yaroslav Halchenko' __license__ = 'BSD' diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py index 79848f5561799..ed0d95d568b6a 100644 --- a/sklearn/tests/test_kernel_approximation.py +++ b/sklearn/tests/test_kernel_approximation.py @@ -2,10 +2,8 @@ from scipy.sparse import csr_matrix import pytest -from sklearn.utils.testing import assert_array_equal, assert_equal -from sklearn.utils.testing import assert_not_equal +from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal, assert_raises -from sklearn.utils.testing import assert_less_equal from sklearn.metrics.pairwise import kernel_metrics from sklearn.kernel_approximation import RBFSampler diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py index 61b34a7509200..aef4080e85e1d 100644 --- a/sklearn/tests/test_multiclass.py +++ b/sklearn/tests/test_multiclass.py @@ -6,11 +6,9 @@ from re import escape from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_warns -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_raises_regexp from sklearn.multiclass import OneVsRestClassifier diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py index 35a739fee122b..2f4369c8e8085 100644 --- a/sklearn/tests/test_multioutput.py +++ b/sklearn/tests/test_multioutput.py @@ -9,9 +9,6 @@ from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_not_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn import datasets from sklearn.base import clone diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py index 018860d96fa84..d8bfcc12993c2 100644 --- a/sklearn/tests/test_naive_bayes.py +++ b/sklearn/tests/test_naive_bayes.py @@ -13,10 +13,8 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_no_warnings diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py index 0a2e67d599d85..e02b5ef96b7b0 100644 --- a/sklearn/tests/test_pipeline.py +++ b/sklearn/tests/test_pipeline.py @@ -16,11 +16,9 @@ from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_dict_equal from sklearn.utils.testing import assert_no_warnings from sklearn.base import clone, BaseEstimator diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py index 93d22ba0e88ef..000a0488f9bed 100644 --- a/sklearn/tests/test_random_projection.py +++ b/sklearn/tests/test_random_projection.py @@ -13,13 +13,10 @@ from sklearn.random_projection import SparseRandomProjection from sklearn.random_projection import GaussianRandomProjection -from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message from sklearn.utils.testing import assert_array_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_in from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_warns from sklearn.exceptions import DataDimensionalityWarning diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py index dbce4a5d0d560..12b424b9bf3b7 100644 --- a/sklearn/tree/tests/test_tree.py +++ b/sklearn/tree/tests/test_tree.py @@ -22,13 +22,7 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_almost_equal -from sklearn.utils.testing import assert_equal -from sklearn.utils.testing import assert_in from sklearn.utils.testing import assert_raises -from sklearn.utils.testing import assert_greater -from sklearn.utils.testing import assert_greater_equal -from sklearn.utils.testing import assert_less -from sklearn.utils.testing import assert_less_equal from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import ignore_warnings diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py index e67fa6eb898ec..59db6fe5f27a7 100644 --- a/sklearn/utils/tests/test_class_weight.py +++ b/sklearn/utils/tests/test_class_weight.py @@ -10,7 +10,6 @@ from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raise_message -from sklearn.utils.testing import assert_equal def test_compute_class_weight(): diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py index bc8b598764b1a..2da6e5f5e9943 100644 --- a/sklearn/utils/tests/test_extmath.py +++ b/sklearn/utils/tests/test_extmath.py @@ -12,12 +12,10 @@ import pytest -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_almost_equal from sklearn.utils.testing import assert_allclose from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_greater from sklearn.utils.testing import assert_warns from sklearn.utils.testing import assert_warns_message from sklearn.utils.testing import skip_if_32bit diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py index b060c5f599e9e..4afbf9e1cbbab 100644 --- a/sklearn/utils/tests/test_fast_dict.py +++ b/sklearn/utils/tests/test_fast_dict.py @@ -3,7 +3,6 @@ import numpy as np from sklearn.utils.fast_dict import IntFloatDict, argmin -from sklearn.utils.testing import assert_equal def test_int_float_dict(): diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 4dc44e797e211..e28adc249f04d 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -13,7 +13,6 @@ from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_array_almost_equal -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_allclose diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py index abd03bff61d29..838c8c8000b9e 100644 --- a/sklearn/utils/tests/test_murmurhash.py +++ b/sklearn/utils/tests/test_murmurhash.py @@ -6,7 +6,6 @@ from sklearn.utils.murmurhash import murmurhash3_32 from numpy.testing import assert_array_almost_equal from numpy.testing import assert_array_equal -from sklearn.utils.testing import assert_equal def test_mmhash3_int(): diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 7cd6929892170..0aa8eae22b1e2 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -11,7 +11,6 @@ import numpy as np import scipy.sparse as sp -from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_raises from sklearn.utils.testing import assert_raises_regex from sklearn.utils.testing import assert_no_warnings