From d0f81bbfa644e58e4059c341f1f2048cff4d1a58 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 14:53:33 +0200
Subject: [PATCH 01/22] fix cluster and compose

---
 .../tests/test_affinity_propagation.py        |  6 +-
 sklearn/cluster/tests/test_bicluster.py       | 30 ++++----
 sklearn/cluster/tests/test_birch.py           | 12 ++--
 sklearn/cluster/tests/test_dbscan.py          | 38 +++++------
 sklearn/cluster/tests/test_hierarchical.py    | 10 +--
 sklearn/cluster/tests/test_k_means.py         | 68 +++++++++----------
 sklearn/cluster/tests/test_optics.py          |  4 +-
 sklearn/cluster/tests/test_spectral.py        |  2 +-
 .../compose/tests/test_column_transformer.py  | 28 ++++----
 9 files changed, 99 insertions(+), 99 deletions(-)

diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index d62df35b2cc55..57ab89cfd6e54 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -37,7 +37,7 @@ def test_affinity_propagation():
 
     n_clusters_ = len(cluster_centers_indices)
 
-    assert_equal(n_clusters, n_clusters_)
+    assert n_clusters == n_clusters_
 
     af = AffinityPropagation(preference=preference, affinity="precomputed")
     labels_precomputed = af.fit(S).labels_
@@ -50,8 +50,8 @@ def test_affinity_propagation():
     cluster_centers_indices = af.cluster_centers_indices_
 
     n_clusters_ = len(cluster_centers_indices)
-    assert_equal(np.unique(labels).size, n_clusters_)
-    assert_equal(n_clusters, n_clusters_)
+    assert np.unique(labels).size == n_clusters_
+    assert n_clusters == n_clusters_
 
     # Test also with no copy
     _, labels_no_copy = affinity_propagation(S, preference=preference,
diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index dd5e91c18c27e..a5b486246c821 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -58,8 +58,8 @@ def _test_shape_indices(model):
     for i in range(model.n_clusters):
         m, n = model.get_shape(i)
         i_ind, j_ind = model.get_indices(i)
-        assert_equal(len(i_ind), m)
-        assert_equal(len(j_ind), n)
+        assert len(i_ind) == m
+        assert len(j_ind) == n
 
 
 def test_spectral_coclustering():
@@ -82,11 +82,11 @@ def test_spectral_coclustering():
                                          **kwargs)
             model.fit(mat)
 
-            assert_equal(model.rows_.shape, (3, 30))
+            assert model.rows_.shape == (3, 30)
             assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
             assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
-            assert_equal(consensus_score(model.biclusters_,
-                                         (rows, cols)), 1)
+            assert consensus_score(model.biclusters_,
+                                         (rows, cols)) == 1
 
             _test_shape_indices(model)
 
@@ -120,14 +120,14 @@ def test_spectral_biclustering():
                 else:
                     model.fit(mat)
 
-                assert_equal(model.rows_.shape, (9, 30))
-                assert_equal(model.columns_.shape, (9, 30))
+                assert model.rows_.shape == (9, 30)
+                assert model.columns_.shape == (9, 30)
                 assert_array_equal(model.rows_.sum(axis=0),
                                    np.repeat(3, 30))
                 assert_array_equal(model.columns_.sum(axis=0),
                                    np.repeat(3, 30))
-                assert_equal(consensus_score(model.biclusters_,
-                                             (rows, cols)), 1)
+                assert consensus_score(model.biclusters_,
+                                             (rows, cols)) == 1
 
                 _test_shape_indices(model)
 
@@ -216,20 +216,20 @@ def test_perfect_checkerboard():
     S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
                                       random_state=0)
     model.fit(S)
-    assert_equal(consensus_score(model.biclusters_,
-                                 (rows, cols)), 1)
+    assert consensus_score(model.biclusters_,
+                                 (rows, cols)) == 1
 
     S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                       random_state=0)
     model.fit(S)
-    assert_equal(consensus_score(model.biclusters_,
-                                 (rows, cols)), 1)
+    assert consensus_score(model.biclusters_,
+                                 (rows, cols)) == 1
 
     S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                       random_state=0)
     model.fit(S)
-    assert_equal(consensus_score(model.biclusters_,
-                                 (rows, cols)), 1)
+    assert consensus_score(model.biclusters_,
+                                 (rows, cols)) == 1
 
 
 def test_errors():
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
index d9dfbbd0c3d2b..522b0304a0111 100644
--- a/sklearn/cluster/tests/test_birch.py
+++ b/sklearn/cluster/tests/test_birch.py
@@ -31,8 +31,8 @@ def test_n_samples_leaves_roots():
     n_samples_root = sum([sc.n_samples_ for sc in brc.root_.subclusters_])
     n_samples_leaves = sum([sc.n_samples_ for leaf in brc._get_leaves()
                             for sc in leaf.subclusters_])
-    assert_equal(n_samples_leaves, X.shape[0])
-    assert_equal(n_samples_root, X.shape[0])
+    assert n_samples_leaves == X.shape[0]
+    assert n_samples_root == X.shape[0]
 
 
 def test_partial_fit():
@@ -76,8 +76,8 @@ def test_n_clusters():
     X, y = make_blobs(n_samples=100, centers=10)
     brc1 = Birch(n_clusters=10)
     brc1.fit(X)
-    assert_greater(len(brc1.subcluster_centers_), 10)
-    assert_equal(len(np.unique(brc1.labels_)), 10)
+    assert len(brc1.subcluster_centers_) > 10
+    assert len(np.unique(brc1.labels_)) == 10
 
     # Test that n_clusters = Agglomerative Clustering gives
     # the same results.
@@ -114,7 +114,7 @@ def test_sparse_X():
 
 def check_branching_factor(node, branching_factor):
     subclusters = node.subclusters_
-    assert_greater_equal(branching_factor, len(subclusters))
+    assert branching_factor >= len(subclusters)
     for cluster in subclusters:
         if cluster.child_:
             check_branching_factor(cluster.child_, branching_factor)
@@ -146,7 +146,7 @@ def check_threshold(birch_instance, threshold):
     while current_leaf:
         subclusters = current_leaf.subclusters_
         for sc in subclusters:
-            assert_greater_equal(threshold, sc.radius)
+            assert threshold >= sc.radius
         current_leaf = current_leaf.next_leaf_
 
 
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index 0c4ec6c78179c..02f110e663ec4 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -41,13 +41,13 @@ def test_dbscan_similarity():
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(labels)) - (1 if -1 in labels else 0)
 
-    assert_equal(n_clusters_1, n_clusters)
+    assert n_clusters_1 == n_clusters
 
     db = DBSCAN(metric="precomputed", eps=eps, min_samples=min_samples)
     labels = db.fit(D).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_2, n_clusters)
+    assert n_clusters_2 == n_clusters
 
 
 def test_dbscan_feature():
@@ -64,13 +64,13 @@ def test_dbscan_feature():
 
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_1, n_clusters)
+    assert n_clusters_1 == n_clusters
 
     db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples)
     labels = db.fit(X).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_2, n_clusters)
+    assert n_clusters_2 == n_clusters
 
 
 def test_dbscan_sparse():
@@ -123,7 +123,7 @@ def test_dbscan_no_core_samples():
         db = DBSCAN(min_samples=6).fit(X_)
         assert_array_equal(db.components_, np.empty((0, X_.shape[1])))
         assert_array_equal(db.labels_, -1)
-        assert_equal(db.core_sample_indices_.shape, (0,))
+        assert db.core_sample_indices_.shape == (0,)
 
 
 def test_dbscan_callable():
@@ -142,14 +142,14 @@ def test_dbscan_callable():
 
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_1, n_clusters)
+    assert n_clusters_1 == n_clusters
 
     db = DBSCAN(metric=metric, eps=eps, min_samples=min_samples,
                 algorithm='ball_tree')
     labels = db.fit(X).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_2, n_clusters)
+    assert n_clusters_2 == n_clusters
 
 
 def test_dbscan_metric_params():
@@ -191,32 +191,32 @@ def test_dbscan_balltree():
 
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_1, n_clusters)
+    assert n_clusters_1 == n_clusters
 
     db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
     labels = db.fit(X).labels_
 
     n_clusters_2 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_2, n_clusters)
+    assert n_clusters_2 == n_clusters
 
     db = DBSCAN(p=2.0, eps=eps, min_samples=min_samples, algorithm='kd_tree')
     labels = db.fit(X).labels_
 
     n_clusters_3 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_3, n_clusters)
+    assert n_clusters_3 == n_clusters
 
     db = DBSCAN(p=1.0, eps=eps, min_samples=min_samples, algorithm='ball_tree')
     labels = db.fit(X).labels_
 
     n_clusters_4 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_4, n_clusters)
+    assert n_clusters_4 == n_clusters
 
     db = DBSCAN(leaf_size=20, eps=eps, min_samples=min_samples,
                 algorithm='ball_tree')
     labels = db.fit(X).labels_
 
     n_clusters_5 = len(set(labels)) - int(-1 in labels)
-    assert_equal(n_clusters_5, n_clusters)
+    assert n_clusters_5 == n_clusters
 
 
 def test_input_validation():
@@ -247,18 +247,18 @@ def test_dbscan_badargs():
 def test_pickle():
     obj = DBSCAN()
     s = pickle.dumps(obj)
-    assert_equal(type(pickle.loads(s)), obj.__class__)
+    assert type(pickle.loads(s)) == obj.__class__
 
 
 def test_boundaries():
     # ensure min_samples is inclusive of core point
     core, _ = dbscan([[0], [1]], eps=2, min_samples=2)
-    assert_in(0, core)
+    assert 0 in core
     # ensure eps is inclusive of circumference
     core, _ = dbscan([[0], [1], [1]], eps=1, min_samples=2)
-    assert_in(0, core)
+    assert 0 in core
     core, _ = dbscan([[0], [1], [1]], eps=.99, min_samples=2)
-    assert_not_in(0, core)
+    assert 0 not in core
 
 
 def test_weighted_dbscan():
@@ -293,7 +293,7 @@ def test_weighted_dbscan():
     rng = np.random.RandomState(42)
     sample_weight = rng.randint(0, 5, X.shape[0])
     core1, label1 = dbscan(X, sample_weight=sample_weight)
-    assert_equal(len(label1), len(X))
+    assert len(label1) == len(X)
 
     X_repeated = np.repeat(X, sample_weight, axis=0)
     core_repeated, label_repeated = dbscan(X_repeated)
@@ -364,11 +364,11 @@ def test_dbscan_precomputed_metric_with_degenerate_input_arrays():
     # more details
     X = np.eye(10)
     labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
-    assert_equal(len(set(labels)), 1)
+    assert len(set(labels)) == 1
 
     X = np.zeros((10, 10))
     labels = DBSCAN(eps=0.5, metric='precomputed').fit(X).labels_
-    assert_equal(len(set(labels)), 1)
+    assert len(set(labels)) == 1
 
 
 def test_dbscan_precomputed_metric_with_initial_rows_zero():
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 00c6813384322..7cd78a3e0ac40 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -93,7 +93,7 @@ def test_unstructured_linkage_tree():
             children, n_nodes, n_leaves, parent = assert_warns(
                 UserWarning, ward_tree, this_X.T, n_clusters=10)
         n_nodes = 2 * X.shape[1] - 1
-        assert_equal(len(children) + n_leaves, n_nodes)
+        assert len(children) + n_leaves == n_nodes
 
     for tree_builder in _TREE_BUILDERS.values():
         for this_X in (X, X[0]):
@@ -102,7 +102,7 @@ def test_unstructured_linkage_tree():
                     UserWarning, tree_builder, this_X.T, n_clusters=10)
 
             n_nodes = 2 * X.shape[1] - 1
-            assert_equal(len(children) + n_leaves, n_nodes)
+            assert len(children) + n_leaves == n_nodes
 
 
 def test_height_linkage_tree():
@@ -538,7 +538,7 @@ def test_compute_full_tree():
     agc.fit(X)
     n_samples = X.shape[0]
     n_nodes = agc.children_.shape[0]
-    assert_equal(n_nodes, n_samples - 1)
+    assert n_nodes == n_samples - 1
 
     # When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
     # we should stop when there are n_clusters.
@@ -550,7 +550,7 @@ def test_compute_full_tree():
     agc.fit(X)
     n_samples = X.shape[0]
     n_nodes = agc.children_.shape[0]
-    assert_equal(n_nodes, n_samples - n_clusters)
+    assert n_nodes == n_samples - n_clusters
 
 
 def test_n_components():
@@ -562,7 +562,7 @@ def test_n_components():
     connectivity = np.eye(5)
 
     for linkage_func in _TREE_BUILDERS.values():
-        assert_equal(ignore_warnings(linkage_func)(X, connectivity)[1], 5)
+        assert ignore_warnings(linkage_func)(X, connectivity)[1] == 5
 
 
 def test_agg_n_clusters():
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 3788039194520..212c2311a84fd 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -155,13 +155,13 @@ def test_minibatch_update_consistency():
     old_inertia, incremental_diff = _mini_batch_step(
         X_mb, sample_weight_mb, x_mb_squared_norms, new_centers, weight_sums,
         buffer, 1, None, random_reassign=False)
-    assert_greater(old_inertia, 0.0)
+    assert old_inertia > 0.0
 
     # compute the new inertia on the same batch to check that it decreased
     labels, new_inertia = _labels_inertia(
         X_mb, sample_weight_mb, x_mb_squared_norms, new_centers)
-    assert_greater(new_inertia, 0.0)
-    assert_less(new_inertia, old_inertia)
+    assert new_inertia > 0.0
+    assert new_inertia < old_inertia
 
     # check that the incremental difference computation is matching the
     # final observed value
@@ -172,13 +172,13 @@ def test_minibatch_update_consistency():
     old_inertia_csr, incremental_diff_csr = _mini_batch_step(
         X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr,
         weight_sums_csr, buffer_csr, 1, None, random_reassign=False)
-    assert_greater(old_inertia_csr, 0.0)
+    assert old_inertia_csr > 0.0
 
     # compute the new inertia on the same batch to check that it decreased
     labels_csr, new_inertia_csr = _labels_inertia(
         X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr)
-    assert_greater(new_inertia_csr, 0.0)
-    assert_less(new_inertia_csr, old_inertia_csr)
+    assert new_inertia_csr > 0.0
+    assert new_inertia_csr < old_inertia_csr
 
     # check that the incremental difference computation is matching the
     # final observed value
@@ -197,14 +197,14 @@ def _check_fitted_model(km):
     # check that the number of clusters centers and distinct labels match
     # the expectation
     centers = km.cluster_centers_
-    assert_equal(centers.shape, (n_clusters, n_features))
+    assert centers.shape == (n_clusters, n_features)
 
     labels = km.labels_
-    assert_equal(np.unique(labels).shape[0], n_clusters)
+    assert np.unique(labels).shape[0] == n_clusters
 
     # check that the labels assignment are perfect (up to a permutation)
-    assert_equal(v_measure_score(true_labels, labels), 1.0)
-    assert_greater(km.inertia_, 0.0)
+    assert v_measure_score(true_labels, labels) == 1.0
+    assert km.inertia_ > 0.0
 
     # check error on dataset being too small
     assert_raise_message(ValueError, "n_samples=1 should be >= n_clusters=%d"
@@ -400,14 +400,14 @@ def test_minibatch_sensible_reassign_fit():
                                  init="random")
     mb_k_means.fit(zeroed_X)
     # there should not be too many exact zero cluster centers
-    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
+    assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10
 
     # do the same with batch-size > X.shape[0] (regression test)
     mb_k_means = MiniBatchKMeans(n_clusters=20, batch_size=201,
                                  random_state=42, init="random")
     mb_k_means.fit(zeroed_X)
     # there should not be too many exact zero cluster centers
-    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
+    assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10
 
 
 def test_minibatch_sensible_reassign_partial_fit():
@@ -418,7 +418,7 @@ def test_minibatch_sensible_reassign_partial_fit():
     for i in range(100):
         mb_k_means.partial_fit(zeroed_X)
     # there should not be too many exact zero cluster centers
-    assert_greater(mb_k_means.cluster_centers_.any(axis=1).sum(), 10)
+    assert mb_k_means.cluster_centers_.any(axis=1).sum() > 10
 
 
 def test_minibatch_reassign():
@@ -445,7 +445,7 @@ def test_minibatch_reassign():
                              reassignment_ratio=1, verbose=True)
         finally:
             sys.stdout = old_stdout
-        assert_greater(score_before, mb_k_means.score(this_X))
+        assert score_before > mb_k_means.score(this_X)
 
     # Give a perfect initialization, with a small reassignment_ratio,
     # no center should be reassigned
@@ -510,14 +510,14 @@ def test_mini_batch_k_means_random_init_partial_fit():
 
     # compute the labeling on the complete dataset
     labels = km.predict(X)
-    assert_equal(v_measure_score(true_labels, labels), 1.0)
+    assert v_measure_score(true_labels, labels) == 1.0
 
 
 def test_minibatch_default_init_size():
     mb_k_means = MiniBatchKMeans(init=centers.copy(), n_clusters=n_clusters,
                                  batch_size=10, random_state=42,
                                  n_init=1).fit(X)
-    assert_equal(mb_k_means.init_size_, 3 * mb_k_means.batch_size)
+    assert mb_k_means.init_size_ == 3 * mb_k_means.batch_size
     _check_fitted_model(mb_k_means)
 
 
@@ -531,8 +531,8 @@ def test_minibatch_set_init_size():
     mb_k_means = MiniBatchKMeans(init=centers.copy(), n_clusters=n_clusters,
                                  init_size=666, random_state=42,
                                  n_init=1).fit(X)
-    assert_equal(mb_k_means.init_size, 666)
-    assert_equal(mb_k_means.init_size_, n_samples)
+    assert mb_k_means.init_size == 666
+    assert mb_k_means.init_size_ == n_samples
     _check_fitted_model(mb_k_means)
 
 
@@ -565,7 +565,7 @@ def test_k_means_non_collapsed():
     km.fit(my_X)
 
     # centers must not been collapsed
-    assert_equal(len(np.unique(km.labels_)), 3)
+    assert len(np.unique(km.labels_)) == 3
 
     centers = km.cluster_centers_
     assert np.linalg.norm(centers[0] - centers[1]) >= 0.1
@@ -636,7 +636,7 @@ def test_int_input():
         ]
 
         for km in fitted_models:
-            assert_equal(km.cluster_centers_.dtype, np.float64)
+            assert km.cluster_centers_.dtype == np.float64
 
         expected_labels = [0, 1, 1, 0, 0, 1]
         scores = np.array([v_measure_score(expected_labels, km.labels_)
@@ -650,10 +650,10 @@ def test_transform():
     X_new = km.transform(km.cluster_centers_)
 
     for c in range(n_clusters):
-        assert_equal(X_new[c, c], 0)
+        assert X_new[c, c] == 0
         for c2 in range(n_clusters):
             if c != c2:
-                assert_greater(X_new[c, c2], 0)
+                assert X_new[c, c2] > 0
 
 
 def test_fit_transform():
@@ -707,14 +707,14 @@ def test_k_means_function():
     finally:
         sys.stdout = old_stdout
     centers = cluster_centers
-    assert_equal(centers.shape, (n_clusters, n_features))
+    assert centers.shape == (n_clusters, n_features)
 
     labels = labels
-    assert_equal(np.unique(labels).shape[0], n_clusters)
+    assert np.unique(labels).shape[0] == n_clusters
 
     # check that the labels assignment are perfect (up to a permutation)
-    assert_equal(v_measure_score(true_labels, labels), 1.0)
-    assert_greater(inertia, 0.0)
+    assert v_measure_score(true_labels, labels) == 1.0
+    assert inertia > 0.0
 
     # check warning when centers are passed
     assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
@@ -766,18 +766,18 @@ def test_float_precision(Estimator, is_sparse):
         estimator.fit(X_test)
         # dtype of cluster centers has to be the dtype of the input
         # data
-        assert_equal(estimator.cluster_centers_.dtype, dtype)
+        assert estimator.cluster_centers_.dtype == dtype
         inertia[dtype] = estimator.inertia_
         X_new[dtype] = estimator.transform(X_test)
         centers[dtype] = estimator.cluster_centers_
         # ensure the extracted row is a 2d array
-        assert_equal(estimator.predict(X_test[:1]),
+        assert (estimator.predict(X_test[:1]) ==
                      estimator.labels_[0])
         if hasattr(estimator, 'partial_fit'):
             estimator.partial_fit(X_test[0:3])
             # dtype of cluster centers has to stay the same after
             # partial_fit
-            assert_equal(estimator.cluster_centers_.dtype, dtype)
+            assert estimator.cluster_centers_.dtype == dtype
 
     # compare arrays with low precision since the difference between
     # 32 and 64 bit sometimes makes a difference up to the 4th decimal
@@ -801,8 +801,8 @@ def test_k_means_init_centers():
         assert_array_equal(init_centers, init_centers_test)
         km = KMeans(init=init_centers_test, n_clusters=3, n_init=1)
         km.fit(X_test)
-        assert_equal(False, np.may_share_memory(km.cluster_centers_,
-                                                init_centers))
+        assert False == np.may_share_memory(km.cluster_centers_,
+                                                init_centers)
 
 
 @pytest.mark.parametrize("data", [X, X_csr], ids=["dense", "sparse"])
@@ -843,7 +843,7 @@ def test_less_centers_than_unique_points():
 
     # only three distinct points, so only three clusters
     # can have points assigned to them
-    assert_equal(set(km.labels_), set(range(3)))
+    assert set(km.labels_) == set(range(3))
 
     # k_means should warn that fewer labels than cluster
     # centers have been used
@@ -920,9 +920,9 @@ def test_check_sample_weight():
     from sklearn.cluster.k_means_ import _check_sample_weight
     sample_weight = None
     checked_sample_weight = _check_sample_weight(X, sample_weight)
-    assert_equal(_num_samples(X), _num_samples(checked_sample_weight))
+    assert _num_samples(X) == _num_samples(checked_sample_weight)
     assert_almost_equal(checked_sample_weight.sum(), _num_samples(X))
-    assert_equal(X.dtype, checked_sample_weight.dtype)
+    assert X.dtype == checked_sample_weight.dtype
 
 
 def test_iter_attribute():
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 1e3d99746c9e9..b90d8ee7a1e3d 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -151,7 +151,7 @@ def test_correct_number_of_clusters():
     clust.fit(X)
     # number of clusters, ignoring noise if present
     n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_)
-    assert_equal(n_clusters_1, n_clusters)
+    assert n_clusters_1 == n_clusters
 
     # check attribute types and sizes
     assert clust.labels_.shape == (len(X),)
@@ -216,7 +216,7 @@ def test_close_extract():
     clust = OPTICS(max_eps=1.0, cluster_method='dbscan',
                    eps=0.3, min_samples=10).fit(X)
     # Cluster ordering starts at 0; max cluster label = 2 is 3 clusters
-    assert_equal(max(clust.labels_), 2)
+    assert max(clust.labels_) == 2
 
 
 @pytest.mark.parametrize('eps', [0.1, .3, .5])
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 44d549d3ee743..58cc9f4f1036f 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -137,7 +137,7 @@ def test_affinities():
 
     def histogram(x, y, **kwargs):
         # Histogram kernel implemented as a callable.
-        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
+        assert kwargs == {}    # no kernel_params that we didn't ask for
         return np.minimum(x, y).sum()
 
     sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index 69df675ac2c25..bcbbcc1c3902e 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -349,7 +349,7 @@ def test_column_transformer_sparse_stacking():
     col_trans.fit(X_array)
     X_trans = col_trans.transform(X_array)
     assert sparse.issparse(X_trans)
-    assert_equal(X_trans.shape, (X_trans.shape[0], X_trans.shape[0] + 1))
+    assert X_trans.shape == (X_trans.shape[0], X_trans.shape[0] + 1)
     assert_array_equal(X_trans.toarray()[:, 1:], np.eye(X_trans.shape[0]))
     assert len(col_trans.transformers_) == 2
     assert col_trans.transformers_[-1][0] != 'remainder'
@@ -515,9 +515,9 @@ def test_make_column_transformer():
     norm = Normalizer()
     ct = make_column_transformer((scaler, 'first'), (norm, ['second']))
     names, transformers, columns = zip(*ct.transformers)
-    assert_equal(names, ("standardscaler", "normalizer"))
-    assert_equal(transformers, (scaler, norm))
-    assert_equal(columns, ('first', ['second']))
+    assert names == ("standardscaler", "normalizer")
+    assert transformers == (scaler, norm)
+    assert columns == ('first', ['second'])
 
 
 def test_make_column_transformer_pandas():
@@ -537,11 +537,11 @@ def test_make_column_transformer_kwargs():
     ct = make_column_transformer((scaler, 'first'), (norm, ['second']),
                                  n_jobs=3, remainder='drop',
                                  sparse_threshold=0.5)
-    assert_equal(ct.transformers, make_column_transformer(
-        (scaler, 'first'), (norm, ['second'])).transformers)
-    assert_equal(ct.n_jobs, 3)
-    assert_equal(ct.remainder, 'drop')
-    assert_equal(ct.sparse_threshold, 0.5)
+    assert ct.transformers == make_column_transformer(
+        (scaler, 'first'), (norm, ['second'])).transformers
+    assert ct.n_jobs == 3
+    assert ct.remainder == 'drop'
+    assert ct.sparse_threshold == 0.5
     # invalid keyword parameters should raise an error message
     assert_raise_message(
         TypeError,
@@ -579,7 +579,7 @@ def test_column_transformer_get_set_params():
            'transformer_weights': None,
            'verbose': False}
 
-    assert_dict_equal(ct.get_params(), exp)
+    assert ct.get_params() == exp
 
     ct.set_params(trans1__with_mean=False)
     assert not ct.get_params()['trans1__with_mean']
@@ -597,7 +597,7 @@ def test_column_transformer_get_set_params():
            'transformer_weights': None,
            'verbose': False}
 
-    assert_dict_equal(ct.get_params(), exp)
+    assert ct.get_params() == exp
 
 
 def test_column_transformer_named_estimators():
@@ -613,7 +613,7 @@ def test_column_transformer_named_estimators():
     assert isinstance(ct.named_transformers_.trans2, StandardScaler)
     assert not ct.named_transformers_.trans2.with_std
     # check it are fitted transformers
-    assert_equal(ct.named_transformers_.trans1.mean_, 1.)
+    assert ct.named_transformers_.trans1.mean_ == 1.
 
 
 def test_column_transformer_cloning():
@@ -647,7 +647,7 @@ def test_column_transformer_get_feature_names():
     ct = ColumnTransformer(
         [('col' + str(i), DictVectorizer(), i) for i in range(2)])
     ct.fit(X)
-    assert_equal(ct.get_feature_names(), ['col0__a', 'col0__b', 'col1__c'])
+    assert ct.get_feature_names() == ['col0__a', 'col0__b', 'col1__c']
 
     # passthrough transformers not supported
     ct = ColumnTransformer([('trans', 'passthrough', [0, 1])])
@@ -667,7 +667,7 @@ def test_column_transformer_get_feature_names():
     ct = ColumnTransformer(
         [('col0', DictVectorizer(), 0), ('col1', 'drop', 1)])
     ct.fit(X)
-    assert_equal(ct.get_feature_names(), ['col0__a', 'col0__b'])
+    assert ct.get_feature_names() == ['col0__a', 'col0__b']
 
 
 def test_column_transformer_special_strings():

From 014b864c301ea8295bf4559012faf038173a1150 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:02:11 +0200
Subject: [PATCH 02/22] fix common tests

---
 sklearn/tests/test_base.py                  | 39 +++++----
 sklearn/tests/test_calibration.py           | 20 ++---
 sklearn/tests/test_common.py                | 11 +--
 sklearn/tests/test_discriminant_analysis.py | 15 ++--
 sklearn/tests/test_dummy.py                 | 24 +++---
 sklearn/tests/test_init.py                  |  2 +-
 sklearn/tests/test_isotonic.py              | 10 +--
 sklearn/tests/test_kernel_approximation.py  | 22 ++---
 sklearn/tests/test_multiclass.py            | 72 ++++++++---------
 sklearn/tests/test_multioutput.py           | 32 ++++----
 sklearn/tests/test_naive_bayes.py           | 32 ++++----
 sklearn/tests/test_pipeline.py              | 90 ++++++++++-----------
 sklearn/tests/test_random_projection.py     | 40 ++++-----
 13 files changed, 203 insertions(+), 206 deletions(-)

diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 316b01ff33415..1ed90ecca42ce 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -116,7 +116,7 @@ def test_clone():
     selector = SelectFpr(f_classif, alpha=0.1)
     new_selector = clone(selector)
     assert selector is not new_selector
-    assert_equal(selector.get_params(), new_selector.get_params())
+    assert selector.get_params() == new_selector.get_params()
 
     selector = SelectFpr(f_classif, alpha=np.zeros((10, 2)))
     new_selector = clone(selector)
@@ -199,13 +199,12 @@ def test_repr():
     my_estimator = MyEstimator()
     repr(my_estimator)
     test = T(K(), K())
-    assert_equal(
-        repr(test),
-        "T(a=K(c=None, d=None), b=K(c=None, d=None))"
-    )
+    assert (
+        repr(test) ==
+        "T(a=K(c=None, d=None), b=K(c=None, d=None))")
 
     some_est = T(a=["long_params"] * 1000)
-    assert_equal(len(repr(some_est)), 495)
+    assert len(repr(some_est)) == 495
 
 
 def test_str():
@@ -288,11 +287,11 @@ def test_score_sample_weight():
         # generate random sample weights
         sample_weight = rng.randint(1, 10, size=len(ds.target))
         # check that the score with and without sample weights are different
-        assert_not_equal(est.score(ds.data, ds.target),
-                         est.score(ds.data, ds.target,
-                                   sample_weight=sample_weight),
-                         msg="Unweighted and weighted scores "
-                             "are unexpectedly equal")
+        assert_message = ("Unweighted and weighted scores "
+                          "are unexpectedly equal")
+        assert (est.score(ds.data, ds.target) !=
+                est.score(ds.data, ds.target,
+                          sample_weight=sample_weight)), assert_message
 
 
 def test_clone_pandas_dataframe():
@@ -330,7 +329,7 @@ def transform(self, X):
 
     # the test
     assert (e.df == cloned_e.df).values.all()
-    assert_equal(e.scalar_param, cloned_e.scalar_param)
+    assert e.scalar_param == cloned_e.scalar_param
 
 
 def test_pickle_version_warning_is_not_raised_with_matching_version():
@@ -343,7 +342,7 @@ def test_pickle_version_warning_is_not_raised_with_matching_version():
     # test that we can predict with the restored decision tree classifier
     score_of_original = tree.score(iris.data, iris.target)
     score_of_restored = tree_restored.score(iris.data, iris.target)
-    assert_equal(score_of_original, score_of_restored)
+    assert score_of_original == score_of_restored
 
 
 class TreeBadVersion(DecisionTreeClassifier):
@@ -424,8 +423,8 @@ def test_pickling_when_getstate_is_overwritten_by_mixin():
 
     serialized = pickle.dumps(estimator)
     estimator_restored = pickle.loads(serialized)
-    assert_equal(estimator_restored.attribute_pickled, 5)
-    assert_equal(estimator_restored._attribute_not_pickled, None)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled == None
     assert estimator_restored._restored
 
 
@@ -438,12 +437,12 @@ def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
         type(estimator).__module__ = "notsklearn"
 
         serialized = estimator.__getstate__()
-        assert_dict_equal(serialized, {'_attribute_not_pickled': None,
-                                       'attribute_pickled': 5})
+        assert serialized == {'_attribute_not_pickled': None,
+                                       'attribute_pickled': 5}
 
         serialized['attribute_pickled'] = 4
         estimator.__setstate__(serialized)
-        assert_equal(estimator.attribute_pickled, 4)
+        assert estimator.attribute_pickled == 4
         assert estimator._restored
     finally:
         type(estimator).__module__ = old_mod
@@ -467,8 +466,8 @@ def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
 
     serialized = pickle.dumps(estimator)
     estimator_restored = pickle.loads(serialized)
-    assert_equal(estimator_restored.attribute_pickled, 5)
-    assert_equal(estimator_restored._attribute_not_pickled, None)
+    assert estimator_restored.attribute_pickled == 5
+    assert estimator_restored._attribute_not_pickled == None
 
 
 def test_tag_inheritance():
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index a907444de2151..db7ed1920c43e 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -58,7 +58,7 @@ def test_calibration():
             prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]
 
             # Check that brier score has improved after calibration
-            assert_greater(brier_score_loss(y_test, prob_pos_clf),
+            assert (brier_score_loss(y_test, prob_pos_clf) >
                            brier_score_loss(y_test, prob_pos_pc_clf))
 
             # Check invariance against relabeling [0, 1] -> [1, 2]
@@ -84,7 +84,7 @@ def test_calibration():
             else:
                 # Isotonic calibration is not invariant against relabeling
                 # but should improve in both cases
-                assert_greater(brier_score_loss(y_test, prob_pos_clf),
+                assert (brier_score_loss(y_test, prob_pos_clf) >
                                brier_score_loss((y_test + 1) % 2,
                                                 prob_pos_pc_clf_relabeled))
 
@@ -122,7 +122,7 @@ def test_sample_weight():
         probs_without_sw = calibrated_clf.predict_proba(X_test)
 
         diff = np.linalg.norm(probs_with_sw - probs_without_sw)
-        assert_greater(diff, 0.1)
+        assert diff > 0.1
 
 
 def test_calibration_multiclass():
@@ -158,7 +158,7 @@ def softmax(y_pred):
         uncalibrated_log_loss = \
             log_loss(y_test, softmax(clf.decision_function(X_test)))
         calibrated_log_loss = log_loss(y_test, probas)
-        assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)
+        assert uncalibrated_log_loss >= calibrated_log_loss
 
     # Test that calibration of a multiclass classifier decreases log-loss
     # for RandomForestClassifier
@@ -177,7 +177,7 @@ def softmax(y_pred):
         cal_clf.fit(X_train, y_train)
         cal_clf_probs = cal_clf.predict_proba(X_test)
         cal_loss = log_loss(y_test, cal_clf_probs)
-        assert_greater(loss, cal_loss)
+        assert loss > cal_loss
 
 
 def test_calibration_prefit():
@@ -217,7 +217,7 @@ def test_calibration_prefit():
                 assert_array_equal(y_pred,
                                    np.array([0, 1])[np.argmax(y_prob, axis=1)])
 
-                assert_greater(brier_score_loss(y_test, prob_pos_clf),
+                assert (brier_score_loss(y_test, prob_pos_clf) >
                                brier_score_loss(y_test, prob_pos_pc_clf))
 
 
@@ -246,8 +246,8 @@ def test_calibration_curve():
     prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2)
     prob_true_unnormalized, prob_pred_unnormalized = \
         calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True)
-    assert_equal(len(prob_true), len(prob_pred))
-    assert_equal(len(prob_true), 2)
+    assert len(prob_true) == len(prob_pred)
+    assert len(prob_true) == 2
     assert_almost_equal(prob_true, [0, 1])
     assert_almost_equal(prob_pred, [0.1, 0.9])
     assert_almost_equal(prob_true, prob_true_unnormalized)
@@ -317,8 +317,8 @@ def test_calibration_less_classes():
             enumerate(cal_clf.calibrated_classifiers_):
         proba = calibrated_classifier.predict_proba(X)
         assert_array_equal(proba[:, i], np.zeros(len(y)))
-        assert_equal(np.all(np.hstack([proba[:, :i],
-                                       proba[:, i + 1:]])), True)
+        assert np.all(np.hstack([proba[:, :i],
+                                       proba[:, i + 1:]])) == True
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 02c50cf2c9503..af5ea34e1d604 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -217,7 +217,7 @@ def test_root_import_all_completeness():
                                                onerror=lambda _: None):
         if '.' in modname or modname.startswith('_') or modname in EXCEPTIONS:
             continue
-        assert_in(modname, sklearn.__all__)
+        assert modname in sklearn.__all__
 
 
 def test_all_tests_are_importable():
@@ -236,7 +236,8 @@ def test_all_tests_are_importable():
                      if ispkg
                      and not HAS_TESTS_EXCEPTIONS.search(name)
                      and name + '.tests' not in lookup]
-    assert_equal(missing_tests, [],
-                 '{0} do not have `tests` subpackages. Perhaps they require '
-                 '__init__.py or an add_subpackage directive in the parent '
-                 'setup.py'.format(missing_tests))
+    assert missing_tests == [], ('{0} do not have `tests` subpackages. '
+                                 'Perhaps they require '
+                                 '__init__.py or an add_subpackage directive '
+                                 'in the parent '
+                                 'setup.py'.format(missing_tests))
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 3428f12b03306..c85227b37a7eb 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -212,10 +212,10 @@ def test_lda_transform():
     # Test LDA transform.
     clf = LinearDiscriminantAnalysis(solver="svd", n_components=1)
     X_transformed = clf.fit(X, y).transform(X)
-    assert_equal(X_transformed.shape[1], 1)
+    assert X_transformed.shape[1] == 1
     clf = LinearDiscriminantAnalysis(solver="eigen", n_components=1)
     X_transformed = clf.fit(X, y).transform(X)
-    assert_equal(X_transformed.shape[1], 1)
+    assert X_transformed.shape[1] == 1
 
     clf = LinearDiscriminantAnalysis(solver="lsqr", n_components=1)
     clf.fit(X, y)
@@ -236,14 +236,12 @@ def test_lda_explained_variance_ratio():
     clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
     clf_lda_eigen.fit(X, y)
     assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
-    assert_equal(clf_lda_eigen.explained_variance_ratio_.shape, (2,),
-                 "Unexpected length for explained_variance_ratio_")
+    assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_"
 
     clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
     clf_lda_svd.fit(X, y)
     assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
-    assert_equal(clf_lda_svd.explained_variance_ratio_.shape, (2,),
-                 "Unexpected length for explained_variance_ratio_")
+    assert clf_lda_svd.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_"
 
     assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_,
                               clf_lda_eigen.explained_variance_ratio_)
@@ -296,8 +294,7 @@ def test_lda_scaling():
     for solver in ('svd', 'lsqr', 'eigen'):
         clf = LinearDiscriminantAnalysis(solver=solver)
         # should be able to separate the data perfectly
-        assert_equal(clf.fit(x, y).score(x, y), 1.0,
-                     'using covariance: %s' % solver)
+        assert clf.fit(x, y).score(x, y) == 1.0, 'using covariance: %s' % solver
 
 
 def test_lda_store_covariance():
@@ -430,7 +427,7 @@ def test_qda_priors():
     y_pred = clf.fit(X6, y6).predict(X6)
     n_pos2 = np.sum(y_pred == 2)
 
-    assert_greater(n_pos2, n_pos)
+    assert n_pos2 > n_pos
 
 
 def test_qda_store_covariance():
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index 8b641448604fe..dc80b2d073d81 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -36,8 +36,8 @@ def _check_predict_proba(clf, X, y):
         log_proba = [log_proba]
 
     for k in range(n_outputs):
-        assert_equal(proba[k].shape[0], n_samples)
-        assert_equal(proba[k].shape[1], len(np.unique(y[:, k])))
+        assert proba[k].shape[0] == n_samples
+        assert proba[k].shape[1] == len(np.unique(y[:, k]))
         assert_array_almost_equal(proba[k].sum(axis=1), np.ones(len(X)))
         # We know that we can have division by zero
         assert_array_almost_equal(np.log(proba[k]), log_proba[k])
@@ -50,7 +50,7 @@ def _check_behavior_2d(clf):
     est = clone(clf)
     est.fit(X, y)
     y_pred = est.predict(X)
-    assert_equal(y.shape, y_pred.shape)
+    assert y.shape == y_pred.shape
 
     # 2d case
     y = np.array([[1, 0],
@@ -60,7 +60,7 @@ def _check_behavior_2d(clf):
     est = clone(clf)
     est.fit(X, y)
     y_pred = est.predict(X)
-    assert_equal(y.shape, y_pred.shape)
+    assert y.shape == y_pred.shape
 
 
 def _check_behavior_2d_for_constant(clf):
@@ -73,7 +73,7 @@ def _check_behavior_2d_for_constant(clf):
     est = clone(clf)
     est.fit(X, y)
     y_pred = est.predict(X)
-    assert_equal(y.shape, y_pred.shape)
+    assert y.shape == y_pred.shape
 
 
 def _check_equality_regressor(statistic, y_learn, y_pred_learn,
@@ -231,7 +231,7 @@ def test_string_labels():
 def test_classifier_score_with_None(y, y_test):
     clf = DummyClassifier(strategy="most_frequent")
     clf.fit(None, y)
-    assert_equal(clf.score(None, y_test), 0.5)
+    assert clf.score(None, y_test) == 0.5
 
 
 @pytest.mark.parametrize("strategy", [
@@ -472,7 +472,7 @@ def test_y_mean_attribute_regressor():
     est = DummyRegressor(strategy='mean')
     est.fit(X, y)
 
-    assert_equal(est.constant_, np.mean(y))
+    assert est.constant_ == np.mean(y)
 
 
 def test_unknown_strategey_regressor():
@@ -645,14 +645,14 @@ def test_dummy_regressor_sample_weight(n_samples=10):
     sample_weight = random_state.rand(n_samples)
 
     est = DummyRegressor(strategy="mean").fit(X, y, sample_weight)
-    assert_equal(est.constant_, np.average(y, weights=sample_weight))
+    assert est.constant_ == np.average(y, weights=sample_weight)
 
     est = DummyRegressor(strategy="median").fit(X, y, sample_weight)
-    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 50.))
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 50.)
 
     est = DummyRegressor(strategy="quantile", quantile=.95).fit(X, y,
                                                                 sample_weight)
-    assert_equal(est.constant_, _weighted_percentile(y, sample_weight, 95.))
+    assert est.constant_ == _weighted_percentile(y, sample_weight, 95.)
 
 
 def test_dummy_regressor_on_3D_array():
@@ -686,7 +686,7 @@ def test_dummy_regressor_return_std():
     cls.fit(X, y)
     y_pred_list = cls.predict(X, return_std=True)
     # there should be two elements when return_std is True
-    assert_equal(len(y_pred_list), 2)
+    assert len(y_pred_list) == 2
     # the second element should be all zeros
     assert_array_equal(y_pred_list[1], y_std_expected)
 
@@ -704,7 +704,7 @@ def test_dummy_regressor_return_std():
 def test_regressor_score_with_None(y, y_test):
     reg = DummyRegressor()
     reg.fit(None, y)
-    assert_equal(reg.score(None, y_test), 1.0)
+    assert reg.score(None, y_test) == 1.0
 
 
 @pytest.mark.parametrize("strategy", [
diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py
index 17f12e8da478e..d936ee4e6d2b7 100644
--- a/sklearn/tests/test_init.py
+++ b/sklearn/tests/test_init.py
@@ -17,4 +17,4 @@ def test_import_skl():
     # Test either above import has failed for some reason
     # "import *" is discouraged outside of the module level, hence we
     # rely on setting up the variable above
-    assert_equal(_top_import_error, None)
+    assert _top_import_error == None
diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py
index f35d75ae73c00..b1baa1b60c71c 100644
--- a/sklearn/tests/test_isotonic.py
+++ b/sklearn/tests/test_isotonic.py
@@ -311,8 +311,8 @@ def test_isotonic_regression_oob_clip():
     # Predict from  training and test x and check that min/max match.
     y1 = ir.predict([min(x) - 10, max(x) + 10])
     y2 = ir.predict(x)
-    assert_equal(max(y1), max(y2))
-    assert_equal(min(y1), min(y2))
+    assert max(y1) == max(y2)
+    assert min(y1) == min(y2)
 
 
 def test_isotonic_regression_oob_nan():
@@ -326,7 +326,7 @@ def test_isotonic_regression_oob_nan():
 
     # Predict from  training and test x and check that we have two NaNs.
     y1 = ir.predict([min(x) - 10, max(x) + 10])
-    assert_equal(sum(np.isnan(y1)), 2)
+    assert sum(np.isnan(y1)) == 2
 
 
 def test_isotonic_regression_oob_bad():
@@ -478,12 +478,12 @@ def test_isotonic_dtype():
                             ensure_2d=False).dtype
 
             res = isotonic_regression(y_np, sample_weight=sample_weight)
-            assert_equal(res.dtype, expected_dtype)
+            assert res.dtype == expected_dtype
 
             X = np.arange(len(y)).astype(dtype)
             reg.fit(X, y_np, sample_weight=sample_weight)
             res = reg.predict(X)
-            assert_equal(res.dtype, expected_dtype)
+            assert res.dtype == expected_dtype
 
 
 def test_make_unique_dtype():
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index 2780981d536c8..af589010d6769 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -65,19 +65,19 @@ def test_additive_chi2_sampler():
 
         # test that the sample_interval is initialized correctly
         transform = AdditiveChi2Sampler(sample_steps=sample_steps)
-        assert_equal(transform.sample_interval, None)
+        assert transform.sample_interval == None
 
         # test that the sample_interval is changed in the fit method
         transform.fit(X)
-        assert_not_equal(transform.sample_interval_, None)
+        assert transform.sample_interval_ != None
 
     # test that the sample_interval is set correctly
     sample_interval = 0.3
     transform = AdditiveChi2Sampler(sample_steps=4,
                                     sample_interval=sample_interval)
-    assert_equal(transform.sample_interval, sample_interval)
+    assert transform.sample_interval == sample_interval
     transform.fit(X)
-    assert_equal(transform.sample_interval_, sample_interval)
+    assert transform.sample_interval_ == sample_interval
 
 
 def test_skewed_chi2_sampler():
@@ -133,10 +133,10 @@ def test_rbf_sampler():
     kernel_approx = np.dot(X_trans, Y_trans.T)
 
     error = kernel - kernel_approx
-    assert_less_equal(np.abs(np.mean(error)), 0.01)  # close to unbiased
+    assert np.abs(np.mean(error)) <= 0.01  # close to unbiased
     np.abs(error, out=error)
-    assert_less_equal(np.max(error), 0.1)  # nothing too far off
-    assert_less_equal(np.mean(error), 0.05)  # mean is fairly close
+    assert np.max(error) <= 0.1  # nothing too far off
+    assert np.mean(error) <= 0.05  # mean is fairly close
 
 
 def test_input_validation():
@@ -163,21 +163,21 @@ def test_nystroem_approximation():
 
     trans = Nystroem(n_components=2, random_state=rnd)
     X_transformed = trans.fit(X).transform(X)
-    assert_equal(X_transformed.shape, (X.shape[0], 2))
+    assert X_transformed.shape == (X.shape[0], 2)
 
     # test callable kernel
     def linear_kernel(X, Y):
         return np.dot(X, Y.T)
     trans = Nystroem(n_components=2, kernel=linear_kernel, random_state=rnd)
     X_transformed = trans.fit(X).transform(X)
-    assert_equal(X_transformed.shape, (X.shape[0], 2))
+    assert X_transformed.shape == (X.shape[0], 2)
 
     # test that available kernels fit and transform
     kernels_available = kernel_metrics()
     for kern in kernels_available:
         trans = Nystroem(n_components=2, kernel=kern, random_state=rnd)
         X_transformed = trans.fit(X).transform(X)
-        assert_equal(X_transformed.shape, (X.shape[0], 2))
+        assert X_transformed.shape == (X.shape[0], 2)
 
 
 def test_nystroem_default_parameters():
@@ -244,7 +244,7 @@ def logging_histogram_kernel(x, y, log):
     Nystroem(kernel=logging_histogram_kernel,
              n_components=(n_samples - 1),
              kernel_params={'log': kernel_log}).fit(X)
-    assert_equal(len(kernel_log), n_samples * (n_samples - 1) / 2)
+    assert len(kernel_log) == n_samples * (n_samples - 1) / 2
 
     def linear_kernel(X, Y):
         return np.dot(X, Y.T)
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 7816656400b61..61b34a7509200 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -66,16 +66,16 @@ def test_ovr_fit_predict():
     # A classifier which implements decision_function.
     ovr = OneVsRestClassifier(LinearSVC(random_state=0))
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ovr.estimators_), n_classes)
+    assert len(ovr.estimators_) == n_classes
 
     clf = LinearSVC(random_state=0)
     pred2 = clf.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(np.mean(iris.target == pred), np.mean(iris.target == pred2))
+    assert np.mean(iris.target == pred) == np.mean(iris.target == pred2)
 
     # A classifier which implements predict_proba.
     ovr = OneVsRestClassifier(MultinomialNB())
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
-    assert_greater(np.mean(iris.target == pred), 0.65)
+    assert np.mean(iris.target == pred) > 0.65
 
 
 # 0.23. warning about tol not having its correct default value.
@@ -91,8 +91,8 @@ def test_ovr_partial_fit():
     pred2 = ovr2.fit(X, y).predict(X)
 
     assert_almost_equal(pred, pred2)
-    assert_equal(len(ovr.estimators_), len(np.unique(y)))
-    assert_greater(np.mean(y == pred), 0.65)
+    assert len(ovr.estimators_) == len(np.unique(y))
+    assert np.mean(y == pred) > 0.65
 
     # Test when mini batches doesn't have all classes
     # with SGDClassifier
@@ -107,7 +107,7 @@ def test_ovr_partial_fit():
     ovr1 = OneVsRestClassifier(SGDClassifier(max_iter=1, tol=None,
                                              shuffle=False, random_state=0))
     pred1 = ovr1.fit(X, y).predict(X)
-    assert_equal(np.mean(pred == y), np.mean(pred1 == y))
+    assert np.mean(pred == y) == np.mean(pred1 == y)
 
     # test partial_fit only exists if estimator has it:
     ovr = OneVsRestClassifier(SVC())
@@ -132,17 +132,17 @@ def test_ovr_ovo_regressor():
     # function
     ovr = OneVsRestClassifier(DecisionTreeRegressor())
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ovr.estimators_), n_classes)
+    assert len(ovr.estimators_) == n_classes
     assert_array_equal(np.unique(pred), [0, 1, 2])
     # we are doing something sensible
-    assert_greater(np.mean(pred == iris.target), .9)
+    assert np.mean(pred == iris.target) > .9
 
     ovr = OneVsOneClassifier(DecisionTreeRegressor())
     pred = ovr.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ovr.estimators_), n_classes * (n_classes - 1) / 2)
+    assert len(ovr.estimators_) == n_classes * (n_classes - 1) / 2
     assert_array_equal(np.unique(pred), [0, 1, 2])
     # we are doing something sensible
-    assert_greater(np.mean(pred == iris.target), .9)
+    assert np.mean(pred == iris.target) > .9
 
 
 def test_ovr_fit_predict_sparse():
@@ -204,7 +204,7 @@ def test_ovr_always_present():
     y_pred = ovr.predict(X)
     assert_array_equal(np.array(y_pred), np.array(y))
     y_pred = ovr.decision_function(X)
-    assert_equal(np.unique(y_pred[:, -2:]), 1)
+    assert np.unique(y_pred[:, -2:]) == 1
     y_pred = ovr.predict_proba(X)
     assert_array_equal(y_pred[:, -1], np.ones(X.shape[0]))
 
@@ -233,7 +233,7 @@ def test_ovr_multiclass():
                      LinearRegression(), Ridge(),
                      ElasticNet()):
         clf = OneVsRestClassifier(base_clf).fit(X, y)
-        assert_equal(set(clf.classes_), classes)
+        assert set(clf.classes_) == classes
         y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
         assert_array_equal(y_pred, ["eggs"])
 
@@ -253,24 +253,24 @@ def test_ovr_binary():
 
     def conduct_test(base_clf, test_predict_proba=False):
         clf = OneVsRestClassifier(base_clf).fit(X, y)
-        assert_equal(set(clf.classes_), classes)
+        assert set(clf.classes_) == classes
         y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
         assert_array_equal(y_pred, ["eggs"])
         if hasattr(base_clf, 'decision_function'):
             dec = clf.decision_function(X)
-            assert_equal(dec.shape, (5,))
+            assert dec.shape == (5,)
 
         if test_predict_proba:
             X_test = np.array([[0, 0, 4]])
             probabilities = clf.predict_proba(X_test)
-            assert_equal(2, len(probabilities[0]))
-            assert_equal(clf.classes_[np.argmax(probabilities, axis=1)],
+            assert 2 == len(probabilities[0])
+            assert (clf.classes_[np.argmax(probabilities, axis=1)] ==
                          clf.predict(X_test))
 
         # test input as label indicator matrix
         clf = OneVsRestClassifier(base_clf).fit(X, Y)
         y_pred = clf.predict([[3, 0, 0]])[0]
-        assert_equal(y_pred, 1)
+        assert y_pred == 1
 
     for base_clf in (LinearSVC(random_state=0), LinearRegression(),
                      Ridge(), ElasticNet()):
@@ -302,8 +302,8 @@ def test_ovr_multilabel():
 def test_ovr_fit_predict_svc():
     ovr = OneVsRestClassifier(svm.SVC())
     ovr.fit(iris.data, iris.target)
-    assert_equal(len(ovr.estimators_), 3)
-    assert_greater(ovr.score(iris.data, iris.target), .9)
+    assert len(ovr.estimators_) == 3
+    assert ovr.score(iris.data, iris.target) > .9
 
 
 def test_ovr_multilabel_dataset():
@@ -450,10 +450,10 @@ def test_ovr_coef_():
             # test with dense and sparse coef
             ovr.fit(X, iris.target)
             shape = ovr.coef_.shape
-            assert_equal(shape[0], n_classes)
-            assert_equal(shape[1], iris.data.shape[1])
+            assert shape[0] == n_classes
+            assert shape[1] == iris.data.shape[1]
             # don't densify sparse coefficients
-            assert_equal(sp.issparse(ovr.estimators_[0].coef_),
+            assert (sp.issparse(ovr.estimators_[0].coef_) ==
                          sp.issparse(ovr.coef_))
 
 
@@ -489,12 +489,12 @@ def test_ovo_fit_predict():
     # A classifier which implements decision_function.
     ovo = OneVsOneClassifier(LinearSVC(random_state=0))
     ovo.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ovo.estimators_), n_classes * (n_classes - 1) / 2)
+    assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2
 
     # A classifier which implements predict_proba.
     ovo = OneVsOneClassifier(MultinomialNB())
     ovo.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ovo.estimators_), n_classes * (n_classes - 1) / 2)
+    assert len(ovo.estimators_) == n_classes * (n_classes - 1) / 2
 
 
 def test_ovo_partial_fit_predict():
@@ -508,8 +508,8 @@ def test_ovo_partial_fit_predict():
     ovo2 = OneVsOneClassifier(MultinomialNB())
     ovo2.fit(X, y)
     pred2 = ovo2.predict(X)
-    assert_equal(len(ovo1.estimators_), n_classes * (n_classes - 1) / 2)
-    assert_greater(np.mean(y == pred1), 0.65)
+    assert len(ovo1.estimators_) == n_classes * (n_classes - 1) / 2
+    assert np.mean(y == pred1) > 0.65
     assert_almost_equal(pred1, pred2)
 
     # Test when mini-batches have binary target classes
@@ -521,8 +521,8 @@ def test_ovo_partial_fit_predict():
     pred2 = ovo2.fit(X, y).predict(X)
 
     assert_almost_equal(pred1, pred2)
-    assert_equal(len(ovo1.estimators_), len(np.unique(y)))
-    assert_greater(np.mean(y == pred1), 0.65)
+    assert len(ovo1.estimators_) == len(np.unique(y))
+    assert np.mean(y == pred1) > 0.65
 
     ovo = OneVsOneClassifier(MultinomialNB())
     X = np.random.rand(14, 2)
@@ -555,13 +555,13 @@ def test_ovo_decision_function():
     # first binary
     ovo_clf.fit(iris.data, iris.target == 0)
     decisions = ovo_clf.decision_function(iris.data)
-    assert_equal(decisions.shape, (n_samples,))
+    assert decisions.shape == (n_samples,)
 
     # then multi-class
     ovo_clf.fit(iris.data, iris.target)
     decisions = ovo_clf.decision_function(iris.data)
 
-    assert_equal(decisions.shape, (n_samples, n_classes))
+    assert decisions.shape == (n_samples, n_classes)
     assert_array_equal(decisions.argmax(axis=1), ovo_clf.predict(iris.data))
 
     # Compute the votes
@@ -592,7 +592,7 @@ def test_ovo_decision_function():
         # to compute the aggregate decision function. The iris dataset
         # has 150 samples with a couple of duplicates. The OvO decisions
         # can resolve most of the ties:
-        assert_greater(len(np.unique(decisions[:, class_idx])), 146)
+        assert len(np.unique(decisions[:, class_idx])) > 146
 
 
 def test_ovo_gridsearch():
@@ -628,7 +628,7 @@ def test_ovo_ties():
     # For the rest, there is no tie and the prediction is the argmax
     assert_array_equal(np.argmax(votes[1:], axis=1), ovo_prediction[1:])
     # For the tie, the prediction is the class with the highest score
-    assert_equal(ovo_prediction[0], normalized_confidences[0].argmax())
+    assert ovo_prediction[0] == normalized_confidences[0].argmax()
 
 
 # 0.23. warning about tol not having its correct default value.
@@ -644,7 +644,7 @@ def test_ovo_ties2():
         multi_clf = OneVsOneClassifier(Perceptron(shuffle=False, max_iter=4,
                                                   tol=None))
         ovo_prediction = multi_clf.fit(X, y).predict(X)
-        assert_equal(ovo_prediction[0], i % 3)
+        assert ovo_prediction[0] == i % 3
 
 
 def test_ovo_string_y():
@@ -685,12 +685,12 @@ def test_ecoc_fit_predict():
     ecoc = OutputCodeClassifier(LinearSVC(random_state=0),
                                 code_size=2, random_state=0)
     ecoc.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ecoc.estimators_), n_classes * 2)
+    assert len(ecoc.estimators_) == n_classes * 2
 
     # A classifier which implements predict_proba.
     ecoc = OutputCodeClassifier(MultinomialNB(), code_size=2, random_state=0)
     ecoc.fit(iris.data, iris.target).predict(iris.data)
-    assert_equal(len(ecoc.estimators_), n_classes * 2)
+    assert len(ecoc.estimators_) == n_classes * 2
 
 
 def test_ecoc_gridsearch():
@@ -727,7 +727,7 @@ def test_pairwise_indices():
     precomputed_indices = ovr_false.pairwise_indices_
 
     for idx in precomputed_indices:
-        assert_equal(idx.shape[0] * n_estimators / (n_estimators - 1),
+        assert (idx.shape[0] * n_estimators / (n_estimators - 1) ==
                      linear_kernel.shape[0])
 
 
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 65bc2a97246f7..35a739fee122b 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -130,7 +130,7 @@ def test_multi_target_sample_weight_partial_fit():
     rgr = MultiOutputRegressor(SGDRegressor(random_state=0, max_iter=5))
     rgr.partial_fit(X, y, w)
 
-    assert_not_equal(rgr.predict(X)[0][0], rgr_w.predict(X)[0][0])
+    assert rgr.predict(X)[0][0] != rgr_w.predict(X)[0][0]
 
 
 def test_multi_target_sample_weights():
@@ -220,11 +220,11 @@ def test_multi_output_classification_partial_fit():
         X[:half_index], y[:half_index], classes=classes)
 
     first_predictions = multi_target_linear.predict(X)
-    assert_equal((n_samples, n_outputs), first_predictions.shape)
+    assert (n_samples, n_outputs) == first_predictions.shape
 
     multi_target_linear.partial_fit(X[half_index:], y[half_index:])
     second_predictions = multi_target_linear.predict(X)
-    assert_equal((n_samples, n_outputs), second_predictions.shape)
+    assert (n_samples, n_outputs) == second_predictions.shape
 
     # train the linear classification with each column and assert that
     # predictions are equal after first partial_fit and second partial_fit
@@ -259,13 +259,13 @@ def test_multi_output_classification():
     multi_target_forest.fit(X, y)
 
     predictions = multi_target_forest.predict(X)
-    assert_equal((n_samples, n_outputs), predictions.shape)
+    assert (n_samples, n_outputs) == predictions.shape
 
     predict_proba = multi_target_forest.predict_proba(X)
 
     assert len(predict_proba) == n_outputs
     for class_probabilities in predict_proba:
-        assert_equal((n_samples, n_classes), class_probabilities.shape)
+        assert (n_samples, n_classes) == class_probabilities.shape
 
     assert_array_equal(np.argmax(np.dstack(predict_proba), axis=1),
                        predictions)
@@ -274,7 +274,7 @@ def test_multi_output_classification():
     for i in range(3):
         forest_ = clone(forest)  # create a clone with the same state
         forest_.fit(X, y[:, i])
-        assert_equal(list(forest_.predict(X)), list(predictions[:, i]))
+        assert list(forest_.predict(X)) == list(predictions[:, i])
         assert_array_equal(list(forest_.predict_proba(X)),
                            list(predict_proba[i]))
 
@@ -288,13 +288,13 @@ def test_multiclass_multioutput_estimator():
     multi_target_svc.fit(X, y)
 
     predictions = multi_target_svc.predict(X)
-    assert_equal((n_samples, n_outputs), predictions.shape)
+    assert (n_samples, n_outputs) == predictions.shape
 
     # train the forest with each column and assert that predictions are equal
     for i in range(3):
         multi_class_svc_ = clone(multi_class_svc)  # create a clone
         multi_class_svc_.fit(X, y[:, i])
-        assert_equal(list(multi_class_svc_.predict(X)),
+        assert (list(multi_class_svc_.predict(X)) ==
                      list(predictions[:, i]))
 
 
@@ -413,7 +413,7 @@ def test_classifier_chain_fit_and_predict_with_linear_svc():
     classifier_chain.fit(X, Y)
 
     Y_pred = classifier_chain.predict(X)
-    assert_equal(Y_pred.shape, Y.shape)
+    assert Y_pred.shape == Y.shape
 
     Y_decision = classifier_chain.decision_function(X)
 
@@ -456,7 +456,7 @@ def test_classifier_chain_vs_independent_models():
     chain.fit(X_train, Y_train)
     Y_pred_chain = chain.predict(X_test)
 
-    assert_greater(jaccard_score(Y_test, Y_pred_chain, average='samples'),
+    assert (jaccard_score(Y_test, Y_pred_chain, average='samples') >
                    jaccard_score(Y_test, Y_pred_ovr, average='samples'))
 
 
@@ -468,8 +468,8 @@ def test_base_chain_fit_and_predict():
     for chain in chains:
         chain.fit(X, Y)
         Y_pred = chain.predict(X)
-        assert_equal(Y_pred.shape, Y.shape)
-        assert_equal([c.coef_.size for c in chain.estimators_],
+        assert Y_pred.shape == Y.shape
+        assert ([c.coef_.size for c in chain.estimators_] ==
                      list(range(X.shape[1], X.shape[1] + Y.shape[1])))
 
     Y_prob = chains[1].predict_proba(X)
@@ -488,7 +488,7 @@ def test_base_chain_fit_and_predict_with_sparse_data_and_cv():
     for chain in base_chains:
         chain.fit(X_sparse, Y)
         Y_pred = chain.predict(X_sparse)
-        assert_equal(Y_pred.shape, Y.shape)
+        assert Y_pred.shape == Y.shape
 
 
 def test_base_chain_random_order():
@@ -501,9 +501,9 @@ def test_base_chain_random_order():
         chain_fixed = clone(chain).set_params(order=chain_random.order_)
         chain_fixed.fit(X, Y)
         assert_array_equal(chain_fixed.order_, chain_random.order_)
-        assert_not_equal(list(chain_random.order), list(range(4)))
-        assert_equal(len(chain_random.order_), 4)
-        assert_equal(len(set(chain_random.order_)), 4)
+        assert list(chain_random.order) != list(range(4))
+        assert len(chain_random.order_) == 4
+        assert len(set(chain_random.order_)) == 4
         # Randomly ordered chain should behave identically to a fixed order
         # chain with the same order.
         for est1, est2 in zip(chain_random.estimators_,
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 77ebb0125529f..018860d96fa84 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -143,7 +143,7 @@ def test_gnb_prior_large_bias():
     """Test if good prediction when class prior favor largely one class"""
     clf = GaussianNB(priors=np.array([0.01, 0.99]))
     clf.fit(X, y)
-    assert_equal(clf.predict([[-0.1, -0.1]]), np.array([2]))
+    assert clf.predict([[-0.1, -0.1]]) == np.array([2])
 
 
 def test_gnb_check_update_with_no_data():
@@ -155,8 +155,8 @@ def test_gnb_check_update_with_no_data():
     x_empty = np.empty((0, X.shape[1]))
     tmean, tvar = GaussianNB._update_mean_variance(prev_points, mean,
                                                    var, x_empty)
-    assert_equal(tmean, mean)
-    assert_equal(tvar, var)
+    assert tmean == mean
+    assert tvar == var
 
 
 def test_gnb_pfit_wrong_nb_features():
@@ -289,8 +289,8 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict(X[-1:]), 2)
-        assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
+        assert clf.predict(X[-1:]) == 2
+        assert clf.predict_proba([X[0]]).shape == (1, 2)
         assert_array_almost_equal(clf.predict_proba(X[:2]).sum(axis=1),
                                   np.array([1., 1.]), 6)
 
@@ -299,8 +299,8 @@ def test_discretenb_predict_proba():
     for cls, X in zip([BernoulliNB, MultinomialNB],
                       [X_bernoulli, X_multinomial]):
         clf = cls().fit(X, y)
-        assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3))
-        assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
+        assert clf.predict_proba(X[0:1]).shape == (1, 3)
+        assert clf.predict_proba(X[:2]).shape == (2, 3)
         assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
         assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
         assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
@@ -387,8 +387,8 @@ def test_discretenb_coef_intercept_shape(cls):
     clf = cls()
 
     clf.fit(X, y)
-    assert_equal(clf.coef_.shape, (1, 3))
-    assert_equal(clf.intercept_.shape, (1,))
+    assert clf.coef_.shape == (1, 3)
+    assert clf.intercept_.shape == (1,)
 
 
 @pytest.mark.parametrize('kind', ('dense', 'sparse'))
@@ -716,24 +716,24 @@ def test_check_accuracy_on_digits():
 
     # Multinomial NB
     scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10)
-    assert_greater(scores.mean(), 0.86)
+    assert scores.mean() > 0.86
 
     scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10)
-    assert_greater(scores.mean(), 0.94)
+    assert scores.mean() > 0.94
 
     # Bernoulli NB
     scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10)
-    assert_greater(scores.mean(), 0.83)
+    assert scores.mean() > 0.83
 
     scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10)
-    assert_greater(scores.mean(), 0.92)
+    assert scores.mean() > 0.92
 
     # Gaussian NB
     scores = cross_val_score(GaussianNB(), X, y, cv=10)
-    assert_greater(scores.mean(), 0.77)
+    assert scores.mean() > 0.77
 
     scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10)
-    assert_greater(scores.mean(), 0.89)
+    assert scores.mean() > 0.89
 
     scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
-    assert_greater(scores.mean(), 0.86)
+    assert scores.mean() > 0.86
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index b40ca7778f2fa..e064f0ba39572 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -170,14 +170,14 @@ def test_pipeline_init():
     # Smoke test with only an estimator
     clf = NoTrans()
     pipe = Pipeline([('svc', clf)])
-    assert_equal(pipe.get_params(deep=True),
+    assert (pipe.get_params(deep=True) ==
                  dict(svc__a=None, svc__b=None, svc=clf,
                       **pipe.get_params(deep=False)))
 
     # Check that params are set
     pipe.set_params(svc__a=0.1)
-    assert_equal(clf.a, 0.1)
-    assert_equal(clf.b, None)
+    assert clf.a == 0.1
+    assert clf.b == None
     # Smoke test the repr:
     repr(pipe)
 
@@ -199,7 +199,7 @@ def test_pipeline_init():
 
     # Check that params are set
     pipe.set_params(svc__C=0.1)
-    assert_equal(clf.C, 0.1)
+    assert clf.C == 0.1
     # Smoke test the repr:
     repr(pipe)
 
@@ -225,7 +225,7 @@ def test_pipeline_init():
     params.pop('anova')
     params2.pop('svc')
     params2.pop('anova')
-    assert_equal(params, params2)
+    assert params == params2
 
 
 def test_pipeline_init_tuple():
@@ -278,10 +278,10 @@ def test_pipeline_sample_weight_supported():
     X = np.array([[1, 2]])
     pipe = Pipeline([('transf', Transf()), ('clf', FitParamT())])
     pipe.fit(X, y=None)
-    assert_equal(pipe.score(X), 3)
-    assert_equal(pipe.score(X, y=None), 3)
-    assert_equal(pipe.score(X, y=None, sample_weight=None), 3)
-    assert_equal(pipe.score(X, sample_weight=np.array([2, 3])), 8)
+    assert pipe.score(X) == 3
+    assert pipe.score(X, y=None) == 3
+    assert pipe.score(X, y=None, sample_weight=None) == 3
+    assert pipe.score(X, sample_weight=np.array([2, 3])) == 8
 
 
 def test_pipeline_sample_weight_unsupported():
@@ -289,8 +289,8 @@ def test_pipeline_sample_weight_unsupported():
     X = np.array([[1, 2]])
     pipe = Pipeline([('transf', Transf()), ('clf', Mult())])
     pipe.fit(X, y=None)
-    assert_equal(pipe.score(X), 3)
-    assert_equal(pipe.score(X, sample_weight=None), 3)
+    assert pipe.score(X) == 3
+    assert pipe.score(X, sample_weight=None) == 3
     assert_raise_message(
         TypeError,
         "score() got an unexpected keyword argument 'sample_weight'",
@@ -382,16 +382,16 @@ def test_pipeline_methods_preprocessing_svm():
 
         # check shapes of various prediction functions
         predict = pipe.predict(X)
-        assert_equal(predict.shape, (n_samples,))
+        assert predict.shape == (n_samples,)
 
         proba = pipe.predict_proba(X)
-        assert_equal(proba.shape, (n_samples, n_classes))
+        assert proba.shape == (n_samples, n_classes)
 
         log_proba = pipe.predict_log_proba(X)
-        assert_equal(log_proba.shape, (n_samples, n_classes))
+        assert log_proba.shape == (n_samples, n_classes)
 
         decision_function = pipe.decision_function(X)
-        assert_equal(decision_function.shape, (n_samples, n_classes))
+        assert decision_function.shape == (n_samples, n_classes)
 
         pipe.score(X, y)
 
@@ -467,7 +467,7 @@ def test_feature_union():
     fs = FeatureUnion([("svd", svd), ("select", select)])
     fs.fit(X, y)
     X_transformed = fs.transform(X)
-    assert_equal(X_transformed.shape, (X.shape[0], 3))
+    assert X_transformed.shape == (X.shape[0], 3)
 
     # check if it does the expected thing
     assert_array_almost_equal(X_transformed[:, :-1], svd.fit_transform(X))
@@ -487,12 +487,12 @@ def test_feature_union():
 
     # test setting parameters
     fs.set_params(select__k=2)
-    assert_equal(fs.fit_transform(X, y).shape, (X.shape[0], 4))
+    assert fs.fit_transform(X, y).shape == (X.shape[0], 4)
 
     # test it works with transformers missing fit_transform
     fs = FeatureUnion([("mock", Transf()), ("svd", svd), ("select", select)])
     X_transformed = fs.fit_transform(X, y)
-    assert_equal(X_transformed.shape, (X.shape[0], 8))
+    assert X_transformed.shape == (X.shape[0], 8)
 
     # test error if some elements do not support transform
     assert_raises_regex(TypeError,
@@ -511,16 +511,16 @@ def test_make_union():
     mock = Transf()
     fu = make_union(pca, mock)
     names, transformers = zip(*fu.transformer_list)
-    assert_equal(names, ("pca", "transf"))
-    assert_equal(transformers, (pca, mock))
+    assert names == ("pca", "transf")
+    assert transformers == (pca, mock)
 
 
 def test_make_union_kwargs():
     pca = PCA(svd_solver='full')
     mock = Transf()
     fu = make_union(pca, mock, n_jobs=3)
-    assert_equal(fu.transformer_list, make_union(pca, mock).transformer_list)
-    assert_equal(3, fu.n_jobs)
+    assert fu.transformer_list == make_union(pca, mock).transformer_list
+    assert 3 == fu.n_jobs
     # invalid keyword parameters should raise an error message
     assert_raise_message(
         TypeError,
@@ -596,15 +596,15 @@ def test_set_pipeline_steps():
     pipeline.steps = [('mock2', transf2)]
     assert 'mock' not in pipeline.named_steps
     assert pipeline.named_steps['mock2'] is transf2
-    assert_equal([('mock2', transf2)], pipeline.steps)
+    assert [('mock2', transf2)] == pipeline.steps
 
     # Using set_params
     pipeline.set_params(steps=[('mock', transf1)])
-    assert_equal([('mock', transf1)], pipeline.steps)
+    assert [('mock', transf1)] == pipeline.steps
 
     # Using set_params to replace single step
     pipeline.set_params(mock=transf2)
-    assert_equal([('mock', transf2)], pipeline.steps)
+    assert [('mock', transf2)] == pipeline.steps
 
     # With invalid data
     pipeline.set_params(steps=[('junk', ())])
@@ -673,7 +673,7 @@ def make():
     assert_array_equal([[exp]], pipeline.fit_transform(X, y))
     assert_array_equal([exp], pipeline.fit(X).predict(X))
     assert_array_equal(X, pipeline.inverse_transform([[exp]]))
-    assert_dict_equal(pipeline.get_params(deep=True),
+    assert (pipeline.get_params(deep=True) ==
                       {'steps': pipeline.steps,
                        'm2': mult2,
                        'm3': passthrough,
@@ -755,14 +755,14 @@ def test_make_pipeline():
     t2 = Transf()
     pipe = make_pipeline(t1, t2)
     assert isinstance(pipe, Pipeline)
-    assert_equal(pipe.steps[0][0], "transf-1")
-    assert_equal(pipe.steps[1][0], "transf-2")
+    assert pipe.steps[0][0] == "transf-1"
+    assert pipe.steps[1][0] == "transf-2"
 
     pipe = make_pipeline(t1, t2, FitParamT())
     assert isinstance(pipe, Pipeline)
-    assert_equal(pipe.steps[0][0], "transf-1")
-    assert_equal(pipe.steps[1][0], "transf-2")
-    assert_equal(pipe.steps[2][0], "fitparamt")
+    assert pipe.steps[0][0] == "transf-1"
+    assert pipe.steps[1][0] == "transf-2"
+    assert pipe.steps[2][0] == "fitparamt"
 
     assert_raise_message(
         TypeError,
@@ -801,7 +801,7 @@ def test_feature_union_weights():
                               10 * pca.fit_transform(X))
     assert_array_equal(X_fit_transformed[:, -1],
                        select.fit_transform(X, y).ravel())
-    assert_equal(X_fit_transformed_wo_method.shape, (X.shape[0], 7))
+    assert X_fit_transformed_wo_method.shape == (X.shape[0], 7)
 
 
 def test_feature_union_parallel():
@@ -825,11 +825,11 @@ def test_feature_union_parallel():
 
     fs.fit(X)
     X_transformed = fs.transform(X)
-    assert_equal(X_transformed.shape[0], len(X))
+    assert X_transformed.shape[0] == len(X)
 
     fs_parallel.fit(X)
     X_transformed_parallel = fs_parallel.transform(X)
-    assert_equal(X_transformed.shape, X_transformed_parallel.shape)
+    assert X_transformed.shape == X_transformed_parallel.shape
     assert_array_equal(
         X_transformed.toarray(),
         X_transformed_parallel.toarray()
@@ -858,7 +858,7 @@ def test_feature_union_feature_names():
     feature_names = ft.get_feature_names()
     for feat in feature_names:
         assert "chars__" in feat or "words__" in feat
-    assert_equal(len(feature_names), 35)
+    assert len(feature_names) == 35
 
     ft = FeatureUnion([("tr1", Transf())]).fit([[1]])
     assert_raise_message(AttributeError,
@@ -891,22 +891,22 @@ def test_set_feature_union_steps():
 
     ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
     assert_array_equal([[2, 3]], ft.transform(np.asarray([[1]])))
-    assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
+    assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()
 
     # Directly setting attr
     ft.transformer_list = [('m5', mult5)]
     assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
-    assert_equal(['m5__x5'], ft.get_feature_names())
+    assert ['m5__x5'] == ft.get_feature_names()
 
     # Using set_params
     ft.set_params(transformer_list=[('mock', mult3)])
     assert_array_equal([[3]], ft.transform(np.asarray([[1]])))
-    assert_equal(['mock__x3'], ft.get_feature_names())
+    assert ['mock__x3'] == ft.get_feature_names()
 
     # Using set_params to replace single step
     ft.set_params(mock=mult5)
     assert_array_equal([[5]], ft.transform(np.asarray([[1]])))
-    assert_equal(['mock__x5'], ft.get_feature_names())
+    assert ['mock__x5'] == ft.get_feature_names()
 
 
 @pytest.mark.parametrize('drop', ['drop', None])
@@ -920,17 +920,17 @@ def test_set_feature_union_step_drop(drop):
     ft = FeatureUnion([('m2', mult2), ('m3', mult3)])
     assert_array_equal([[2, 3]], ft.fit(X).transform(X))
     assert_array_equal([[2, 3]], ft.fit_transform(X))
-    assert_equal(['m2__x2', 'm3__x3'], ft.get_feature_names())
+    assert ['m2__x2', 'm3__x3'] == ft.get_feature_names()
 
     ft.set_params(m2=drop)
     assert_array_equal([[3]], ft.fit(X).transform(X))
     assert_array_equal([[3]], ft.fit_transform(X))
-    assert_equal(['m3__x3'], ft.get_feature_names())
+    assert ['m3__x3'] == ft.get_feature_names()
 
     ft.set_params(m3=drop)
     assert_array_equal([[]], ft.fit(X).transform(X))
     assert_array_equal([[]], ft.fit_transform(X))
-    assert_equal([], ft.get_feature_names())
+    assert [] == ft.get_feature_names()
 
     # check we can change back
     ft.set_params(m3=mult3)
@@ -940,7 +940,7 @@ def test_set_feature_union_step_drop(drop):
     ft = FeatureUnion([('m2', drop), ('m3', mult3)])
     assert_array_equal([[3]], ft.fit(X).transform(X))
     assert_array_equal([[3]], ft.fit_transform(X))
-    assert_equal(['m3__x3'], ft.get_feature_names())
+    assert ['m3__x3'] == ft.get_feature_names()
 
 
 def test_step_name_validation():
@@ -1066,7 +1066,7 @@ def test_pipeline_memory():
         assert_array_equal(pipe.score(X, y), cached_pipe.score(X, y))
         assert_array_equal(pipe.named_steps['transf'].means_,
                            cached_pipe.named_steps['transf'].means_)
-        assert_equal(ts, cached_pipe.named_steps['transf'].timestamp_)
+        assert ts == cached_pipe.named_steps['transf'].timestamp_
         # Create a new pipeline with cloned estimators
         # Check that even changing the name step does not affect the cache hit
         clf_2 = SVC(probability=True, random_state=0)
@@ -1084,7 +1084,7 @@ def test_pipeline_memory():
         assert_array_equal(pipe.score(X, y), cached_pipe_2.score(X, y))
         assert_array_equal(pipe.named_steps['transf'].means_,
                            cached_pipe_2.named_steps['transf_2'].means_)
-        assert_equal(ts, cached_pipe_2.named_steps['transf_2'].timestamp_)
+        assert ts == cached_pipe_2.named_steps['transf_2'].timestamp_
     finally:
         shutil.rmtree(cachedir)
 
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index a1205610cdb9f..93d22ba0e88ef 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -91,10 +91,10 @@ def check_input_size_random_matrix(random_matrix):
 
 
 def check_size_generated(random_matrix):
-    assert_equal(random_matrix(1, 5).shape, (1, 5))
-    assert_equal(random_matrix(5, 1).shape, (5, 1))
-    assert_equal(random_matrix(5, 5).shape, (5, 5))
-    assert_equal(random_matrix(1, 1).shape, (1, 1))
+    assert random_matrix(1, 5).shape == (1, 5)
+    assert random_matrix(5, 1).shape == (5, 1)
+    assert random_matrix(5, 5).shape == (5, 5)
+    assert random_matrix(1, 1).shape == (1, 1)
 
 
 def check_zero_mean_and_unit_norm(random_matrix):
@@ -162,14 +162,14 @@ def test_sparse_random_matrix():
 
         # Check possible values
         values = np.unique(A)
-        assert_in(np.sqrt(s) / np.sqrt(n_components), values)
-        assert_in(- np.sqrt(s) / np.sqrt(n_components), values)
+        assert np.sqrt(s) / np.sqrt(n_components) in values
+        assert - np.sqrt(s) / np.sqrt(n_components) in values
 
         if density == 1.0:
-            assert_equal(np.size(values), 2)
+            assert np.size(values) == 2
         else:
-            assert_in(0., values)
-            assert_equal(np.size(values), 3)
+            assert 0. in values
+            assert np.size(values) == 3
 
         # Check that the random matrix follow the proper distribution.
         # Let's say that each element of a_{ij} of A is taken from
@@ -263,8 +263,8 @@ def test_random_projection_embedding_quality():
         # check that the automatically tuned values for the density respect the
         # contract for eps: pairwise distances are preserved according to the
         # Johnson-Lindenstrauss lemma
-        assert_less(distances_ratio.max(), 1 + eps)
-        assert_less(1 - eps, distances_ratio.min())
+        assert distances_ratio.max() < 1 + eps
+        assert 1 - eps < distances_ratio.min()
 
 
 def test_SparseRandomProjection_output_representation():
@@ -298,17 +298,17 @@ def test_correct_RandomProjection_dimensions_embedding():
 
         # the number of components is adjusted from the shape of the training
         # set
-        assert_equal(rp.n_components, 'auto')
-        assert_equal(rp.n_components_, 110)
+        assert rp.n_components == 'auto'
+        assert rp.n_components_ == 110
 
         if RandomProjection in all_SparseRandomProjection:
-            assert_equal(rp.density, 'auto')
+            assert rp.density == 'auto'
             assert_almost_equal(rp.density_, 0.03, 2)
 
-        assert_equal(rp.components_.shape, (110, n_features))
+        assert rp.components_.shape == (110, n_features)
 
         projected_1 = rp.transform(data)
-        assert_equal(projected_1.shape, (n_samples, 110))
+        assert projected_1.shape == (n_samples, 110)
 
         # once the RP is 'fitted' the projection is always the same
         projected_2 = rp.transform(data)
@@ -328,10 +328,10 @@ def test_correct_RandomProjection_dimensions_embedding():
             rp = RandomProjection(n_components=100, density=0.001,
                                   random_state=0)
             projected = rp.fit_transform(data)
-            assert_equal(projected.shape, (n_samples, 100))
-            assert_equal(rp.components_.shape, (100, n_features))
-            assert_less(rp.components_.nnz, 115)  # close to 1% density
-            assert_less(85, rp.components_.nnz)  # close to 1% density
+            assert projected.shape == (n_samples, 100)
+            assert rp.components_.shape == (100, n_features)
+            assert rp.components_.nnz < 115  # close to 1% density
+            assert 85 < rp.components_.nnz  # close to 1% density
 
 
 def test_warning_n_components_greater_than_n_features():

From e9ad46e91aca526060be30277be7680eeeed993a Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:28:15 +0200
Subject: [PATCH 03/22] fix covariance, cross_decomposition, datasets

---
 sklearn/covariance/tests/test_covariance.py   |   2 +-
 sklearn/cross_decomposition/tests/test_pls.py |   2 +-
 sklearn/datasets/tests/test_20news.py         |  34 ++---
 sklearn/datasets/tests/test_base.py           |  92 +++++------
 sklearn/datasets/tests/test_covtype.py        |  10 +-
 sklearn/datasets/tests/test_kddcup99.py       |  24 +--
 sklearn/datasets/tests/test_lfw.py            |  10 +-
 sklearn/datasets/tests/test_rcv1.py           |  14 +-
 .../datasets/tests/test_samples_generator.py  | 143 +++++++++---------
 .../datasets/tests/test_svmlight_format.py    |  70 ++++-----
 10 files changed, 199 insertions(+), 202 deletions(-)

diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index bf4449004ae0d..d7e6428ee27fb 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -43,7 +43,7 @@ def test_covariance():
                   cov.error_norm, emp_cov, norm='foo')
     # Mahalanobis distances computation test
     mahal_dist = cov.mahalanobis(X)
-    assert_greater(np.amin(mahal_dist), 0)
+    assert np.amin(mahal_dist) > 0
 
     # test with n_features = 1
     X_1d = X[:, 0].reshape((-1, 1))
diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
index 3d408443e1563..abb305aefdb37 100644
--- a/sklearn/cross_decomposition/tests/test_pls.py
+++ b/sklearn/cross_decomposition/tests/test_pls.py
@@ -281,7 +281,7 @@ def test_PLSSVD():
     for clf in [pls_.PLSSVD, pls_.PLSRegression, pls_.PLSCanonical]:
         pls = clf(n_components=n_components)
         pls.fit(X, Y)
-        assert_equal(n_components, pls.y_scores_.shape[1])
+        assert n_components == pls.y_scores_.shape[1]
 
 
 def test_univariate_pls_regression():
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 90b09614b7a3a..5b171999433db 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -22,14 +22,14 @@ def test_20news():
         subset='all', categories=data.target_names[-1:-3:-1], shuffle=False)
     # Check that the ordering of the target_names is the same
     # as the ordering in the full dataset
-    assert_equal(data2cats.target_names,
+    assert (data2cats.target_names ==
                  data.target_names[-2:])
     # Assert that we have only 0 and 1 as labels
-    assert_equal(np.unique(data2cats.target).tolist(), [0, 1])
+    assert np.unique(data2cats.target).tolist() == [0, 1]
 
     # Check that the number of filenames is consistent with data/target
-    assert_equal(len(data2cats.filenames), len(data2cats.target))
-    assert_equal(len(data2cats.filenames), len(data2cats.data))
+    assert len(data2cats.filenames) == len(data2cats.target)
+    assert len(data2cats.filenames) == len(data2cats.data)
 
     # Check that the first entry of the reduced dataset corresponds to
     # the first entry of the corresponding category in the full dataset
@@ -37,7 +37,7 @@ def test_20news():
     category = data2cats.target_names[data2cats.target[0]]
     label = data.target_names.index(category)
     entry2 = data.data[np.where(data.target == label)[0][0]]
-    assert_equal(entry1, entry2)
+    assert entry1 == entry2
 
 
 def test_20news_length_consistency():
@@ -52,9 +52,9 @@ def test_20news_length_consistency():
         raise SkipTest("Download 20 newsgroups to run this test")
     # Extract the full dataset
     data = datasets.fetch_20newsgroups(subset='all')
-    assert_equal(len(data['data']), len(data.data))
-    assert_equal(len(data['target']), len(data.target))
-    assert_equal(len(data['filenames']), len(data.filenames))
+    assert len(data['data']) == len(data.data)
+    assert len(data['target']) == len(data.target)
+    assert len(data['filenames']) == len(data.filenames)
 
 
 def test_20news_vectorized():
@@ -67,16 +67,16 @@ def test_20news_vectorized():
     # test subset = train
     bunch = datasets.fetch_20newsgroups_vectorized(subset="train")
     assert sp.isspmatrix_csr(bunch.data)
-    assert_equal(bunch.data.shape, (11314, 130107))
-    assert_equal(bunch.target.shape[0], 11314)
-    assert_equal(bunch.data.dtype, np.float64)
+    assert bunch.data.shape == (11314, 130107)
+    assert bunch.target.shape[0] == 11314
+    assert bunch.data.dtype == np.float64
 
     # test subset = test
     bunch = datasets.fetch_20newsgroups_vectorized(subset="test")
     assert sp.isspmatrix_csr(bunch.data)
-    assert_equal(bunch.data.shape, (7532, 130107))
-    assert_equal(bunch.target.shape[0], 7532)
-    assert_equal(bunch.data.dtype, np.float64)
+    assert bunch.data.shape == (7532, 130107)
+    assert bunch.target.shape[0] == 7532
+    assert bunch.data.dtype == np.float64
 
     # test return_X_y option
     fetch_func = partial(datasets.fetch_20newsgroups_vectorized, subset='test')
@@ -85,6 +85,6 @@ def test_20news_vectorized():
     # test subset = all
     bunch = datasets.fetch_20newsgroups_vectorized(subset='all')
     assert sp.isspmatrix_csr(bunch.data)
-    assert_equal(bunch.data.shape, (11314 + 7532, 130107))
-    assert_equal(bunch.target.shape[0], 11314 + 7532)
-    assert_equal(bunch.data.dtype, np.float64)
+    assert bunch.data.shape == (11314 + 7532, 130107)
+    assert bunch.target.shape[0] == 11314 + 7532
+    assert bunch.data.dtype == np.float64
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 676cb00fd16f8..ef802d0c588a6 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -72,7 +72,7 @@ def test_category_dir_2(load_files_root):
 def test_data_home(data_home):
     # get_data_home will point to a pre-existing folder
     data_home = get_data_home(data_home=data_home)
-    assert_equal(data_home, data_home)
+    assert data_home == data_home
     assert os.path.exists(data_home)
 
     # clear_data_home will delete both the content and the folder it-self
@@ -86,9 +86,9 @@ def test_data_home(data_home):
 
 def test_default_empty_load_files(load_files_root):
     res = load_files(load_files_root)
-    assert_equal(len(res.filenames), 0)
-    assert_equal(len(res.target_names), 0)
-    assert_equal(res.DESCR, None)
+    assert len(res.filenames) == 0
+    assert len(res.target_names) == 0
+    assert res.DESCR == None
 
 
 def test_default_load_files(test_category_dir_1, test_category_dir_2,
@@ -96,10 +96,10 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2,
     if IS_PYPY:
         pytest.xfail('[PyPy] fails due to string containing NUL characters')
     res = load_files(load_files_root)
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 2)
-    assert_equal(res.DESCR, None)
-    assert_equal(res.data, [b"Hello World!\n"])
+    assert len(res.filenames) == 1
+    assert len(res.target_names) == 2
+    assert res.DESCR == None
+    assert res.data == [b"Hello World!\n"]
 
 
 def test_load_files_w_categories_desc_and_encoding(
@@ -109,26 +109,26 @@ def test_load_files_w_categories_desc_and_encoding(
     category = os.path.abspath(test_category_dir_1).split('/').pop()
     res = load_files(load_files_root, description="test",
                      categories=category, encoding="utf-8")
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 1)
-    assert_equal(res.DESCR, "test")
-    assert_equal(res.data, ["Hello World!\n"])
+    assert len(res.filenames) == 1
+    assert len(res.target_names) == 1
+    assert res.DESCR == "test"
+    assert res.data == ["Hello World!\n"]
 
 
 def test_load_files_wo_load_content(
         test_category_dir_1, test_category_dir_2, load_files_root):
     res = load_files(load_files_root, load_content=False)
-    assert_equal(len(res.filenames), 1)
-    assert_equal(len(res.target_names), 2)
-    assert_equal(res.DESCR, None)
-    assert_equal(res.get('data'), None)
+    assert len(res.filenames) == 1
+    assert len(res.target_names) == 2
+    assert res.DESCR == None
+    assert res.get('data') == None
 
 
 def test_load_sample_images():
     try:
         res = load_sample_images()
-        assert_equal(len(res.images), 2)
-        assert_equal(len(res.filenames), 2)
+        assert len(res.images) == 2
+        assert len(res.filenames) == 2
         images = res.images
 
         # assert is china image
@@ -144,8 +144,8 @@ def test_load_sample_images():
 
 def test_load_digits():
     digits = load_digits()
-    assert_equal(digits.data.shape, (1797, 64))
-    assert_equal(numpy.unique(digits.target).size, 10)
+    assert digits.data.shape == (1797, 64)
+    assert numpy.unique(digits.target).size == 10
 
     # test return_X_y option
     check_return_X_y(digits, partial(load_digits))
@@ -153,15 +153,15 @@ def test_load_digits():
 
 def test_load_digits_n_class_lt_10():
     digits = load_digits(9)
-    assert_equal(digits.data.shape, (1617, 64))
-    assert_equal(numpy.unique(digits.target).size, 9)
+    assert digits.data.shape == (1617, 64)
+    assert numpy.unique(digits.target).size == 9
 
 
 def test_load_sample_image():
     try:
         china = load_sample_image('china.jpg')
-        assert_equal(china.dtype, 'uint8')
-        assert_equal(china.shape, (427, 640, 3))
+        assert china.dtype == 'uint8'
+        assert china.shape == (427, 640, 3)
     except ImportError:
         warnings.warn("Could not load sample images, PIL is not available.")
 
@@ -176,9 +176,9 @@ def test_load_missing_sample_image_error():
 
 def test_load_diabetes():
     res = load_diabetes()
-    assert_equal(res.data.shape, (442, 10))
+    assert res.data.shape == (442, 10)
     assert res.target.size, 442
-    assert_equal(len(res.feature_names), 10)
+    assert len(res.feature_names) == 10
     assert res.DESCR
 
     # test return_X_y option
@@ -187,9 +187,9 @@ def test_load_diabetes():
 
 def test_load_linnerud():
     res = load_linnerud()
-    assert_equal(res.data.shape, (20, 3))
-    assert_equal(res.target.shape, (20, 3))
-    assert_equal(len(res.target_names), 3)
+    assert res.data.shape == (20, 3)
+    assert res.target.shape == (20, 3)
+    assert len(res.target_names) == 3
     assert res.DESCR
     assert os.path.exists(res.data_filename)
     assert os.path.exists(res.target_filename)
@@ -200,9 +200,9 @@ def test_load_linnerud():
 
 def test_load_iris():
     res = load_iris()
-    assert_equal(res.data.shape, (150, 4))
-    assert_equal(res.target.size, 150)
-    assert_equal(res.target_names.size, 3)
+    assert res.data.shape == (150, 4)
+    assert res.target.size == 150
+    assert res.target_names.size == 3
     assert res.DESCR
     assert os.path.exists(res.filename)
 
@@ -212,9 +212,9 @@ def test_load_iris():
 
 def test_load_wine():
     res = load_wine()
-    assert_equal(res.data.shape, (178, 13))
-    assert_equal(res.target.size, 178)
-    assert_equal(res.target_names.size, 3)
+    assert res.data.shape == (178, 13)
+    assert res.target.size == 178
+    assert res.target_names.size == 3
     assert res.DESCR
 
     # test return_X_y option
@@ -223,9 +223,9 @@ def test_load_wine():
 
 def test_load_breast_cancer():
     res = load_breast_cancer()
-    assert_equal(res.data.shape, (569, 30))
-    assert_equal(res.target.size, 569)
-    assert_equal(res.target_names.size, 2)
+    assert res.data.shape == (569, 30)
+    assert res.target.size == 569
+    assert res.target_names.size == 2
     assert res.DESCR
     assert os.path.exists(res.filename)
 
@@ -235,9 +235,9 @@ def test_load_breast_cancer():
 
 def test_load_boston():
     res = load_boston()
-    assert_equal(res.data.shape, (506, 13))
-    assert_equal(res.target.size, 506)
-    assert_equal(res.feature_names.size, 13)
+    assert res.data.shape == (506, 13)
+    assert res.target.size == 506
+    assert res.feature_names.size == 13
     assert res.DESCR
     assert os.path.exists(res.filename)
 
@@ -249,7 +249,7 @@ def test_loads_dumps_bunch():
     bunch = Bunch(x="x")
     bunch_from_pkl = loads(dumps(bunch))
     bunch_from_pkl.x = "y"
-    assert_equal(bunch_from_pkl['x'], bunch_from_pkl.x)
+    assert bunch_from_pkl['x'] == bunch_from_pkl.x
 
 
 def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
@@ -264,13 +264,13 @@ def test_bunch_pickle_generated_with_0_16_and_read_with_0_17():
     bunch.__dict__['key'] = 'set from __dict__'
     bunch_from_pkl = loads(dumps(bunch))
     # After loading from pickle the __dict__ should have been ignored
-    assert_equal(bunch_from_pkl.key, 'original')
-    assert_equal(bunch_from_pkl['key'], 'original')
+    assert bunch_from_pkl.key == 'original'
+    assert bunch_from_pkl['key'] == 'original'
     # Making sure that changing the attr does change the value
     # associated with __getitem__ as well
     bunch_from_pkl.key = 'changed'
-    assert_equal(bunch_from_pkl.key, 'changed')
-    assert_equal(bunch_from_pkl['key'], 'changed')
+    assert bunch_from_pkl.key == 'changed'
+    assert bunch_from_pkl['key'] == 'changed'
 
 
 def test_bunch_dir():
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 449382f824525..0c30a0c7d5b18 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -22,14 +22,14 @@ def test_fetch():
     data2 = fetch(shuffle=True, random_state=37)
 
     X1, X2 = data1['data'], data2['data']
-    assert_equal((581012, 54), X1.shape)
-    assert_equal(X1.shape, X2.shape)
+    assert (581012, 54) == X1.shape
+    assert X1.shape == X2.shape
 
-    assert_equal(X1.sum(), X2.sum())
+    assert X1.sum() == X2.sum()
 
     y1, y2 = data1['target'], data2['target']
-    assert_equal((X1.shape[0],), y1.shape)
-    assert_equal((X1.shape[0],), y2.shape)
+    assert (X1.shape[0],) == y1.shape
+    assert (X1.shape[0],) == y2.shape
 
     # test return_X_y option
     fetch_func = partial(fetch)
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index ce7096f3863b8..6efb23c6dfd26 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -18,28 +18,28 @@ def test_percent10():
     except IOError:
         raise SkipTest("kddcup99 dataset can not be loaded.")
 
-    assert_equal(data.data.shape, (494021, 41))
-    assert_equal(data.target.shape, (494021,))
+    assert data.data.shape == (494021, 41)
+    assert data.target.shape == (494021,)
 
     data_shuffled = fetch_kddcup99(shuffle=True, random_state=0)
-    assert_equal(data.data.shape, data_shuffled.data.shape)
-    assert_equal(data.target.shape, data_shuffled.target.shape)
+    assert data.data.shape == data_shuffled.data.shape
+    assert data.target.shape == data_shuffled.target.shape
 
     data = fetch_kddcup99('SA')
-    assert_equal(data.data.shape, (100655, 41))
-    assert_equal(data.target.shape, (100655,))
+    assert data.data.shape == (100655, 41)
+    assert data.target.shape == (100655,)
 
     data = fetch_kddcup99('SF')
-    assert_equal(data.data.shape, (73237, 4))
-    assert_equal(data.target.shape, (73237,))
+    assert data.data.shape == (73237, 4)
+    assert data.target.shape == (73237,)
 
     data = fetch_kddcup99('http')
-    assert_equal(data.data.shape, (58725, 3))
-    assert_equal(data.target.shape, (58725,))
+    assert data.data.shape == (58725, 3)
+    assert data.target.shape == (58725,)
 
     data = fetch_kddcup99('smtp')
-    assert_equal(data.data.shape, (9571, 3))
-    assert_equal(data.target.shape, (9571,))
+    assert data.data.shape == (9571, 3)
+    assert data.target.shape == (9571,)
 
     fetch_func = partial(fetch_kddcup99, 'smtp')
     check_return_X_y(data, fetch_func)
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 1afd09084371c..11211e803f93d 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -117,8 +117,8 @@ def test_load_fake_lfw_people():
 
     # The data is croped around the center as a rectangular bounding box
     # around the face. Colors are converted to gray levels:
-    assert_equal(lfw_people.images.shape, (10, 62, 47))
-    assert_equal(lfw_people.data.shape, (10, 2914))
+    assert lfw_people.images.shape == (10, 62, 47)
+    assert lfw_people.data.shape == (10, 2914)
 
     # the target is array of person integer ids
     assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])
@@ -132,7 +132,7 @@ def test_load_fake_lfw_people():
     lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
                                   slice_=None, color=True,
                                   download_if_missing=False)
-    assert_equal(lfw_people.images.shape, (17, 250, 250, 3))
+    assert lfw_people.images.shape == (17, 250, 250, 3)
 
     # the ids and class names are the same as previously
     assert_array_equal(lfw_people.target,
@@ -166,7 +166,7 @@ def test_load_fake_lfw_pairs():
 
     # The data is croped around the center as a rectangular bounding box
     # around the face. Colors are converted to gray levels:
-    assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 62, 47))
+    assert lfw_pairs_train.pairs.shape == (10, 2, 62, 47)
 
     # the target is whether the person is the same or not
     assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
@@ -180,7 +180,7 @@ def test_load_fake_lfw_pairs():
     lfw_pairs_train = fetch_lfw_pairs(data_home=SCIKIT_LEARN_DATA, resize=None,
                                       slice_=None, color=True,
                                       download_if_missing=False)
-    assert_equal(lfw_pairs_train.pairs.shape, (10, 2, 250, 250, 3))
+    assert lfw_pairs_train.pairs.shape == (10, 2, 250, 250, 3)
 
     # the ids and class names are the same as previously
     assert_array_equal(lfw_pairs_train.target, [1, 1, 1, 1, 1, 0, 0, 0, 0, 0])
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index ea12c9f8e3a12..aa747bd5d74fe 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -28,14 +28,14 @@ def test_fetch_rcv1():
     # test sparsity
     assert sp.issparse(X1)
     assert sp.issparse(Y1)
-    assert_equal(60915113, X1.data.size)
-    assert_equal(2606875, Y1.data.size)
+    assert 60915113 == X1.data.size
+    assert 2606875 == Y1.data.size
 
     # test shapes
-    assert_equal((804414, 47236), X1.shape)
-    assert_equal((804414, 103), Y1.shape)
-    assert_equal((804414,), s1.shape)
-    assert_equal(103, len(cat_list))
+    assert (804414, 47236) == X1.shape
+    assert (804414, 103) == Y1.shape
+    assert (804414,) == s1.shape
+    assert 103 == len(cat_list)
 
     # test ordering of categories
     first_categories = ['C11', 'C12', 'C13', 'C14', 'C15', 'C151']
@@ -46,7 +46,7 @@ def test_fetch_rcv1():
     number_non_zero_in_cat = (5, 1206, 381327)
     for num, cat in zip(number_non_zero_in_cat, some_categories):
         j = cat_list.index(cat)
-        assert_equal(num, Y1[:, j].data.size)
+        assert num == Y1[:, j].data.size
 
     # test shuffling and subset
     data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77,
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index c66a056a5a0aa..f3e0e20b7dea8 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -44,24 +44,23 @@ def test_make_classification():
                                shift=None, scale=None, weights=weights,
                                random_state=0)
 
-    assert_equal(weights, [0.1, 0.25])
-    assert_equal(X.shape, (100, 20), "X shape mismatch")
-    assert_equal(y.shape, (100,), "y shape mismatch")
-    assert_equal(np.unique(y).shape, (3,), "Unexpected number of classes")
-    assert_equal(sum(y == 0), 10, "Unexpected number of samples in class #0")
-    assert_equal(sum(y == 1), 25, "Unexpected number of samples in class #1")
-    assert_equal(sum(y == 2), 65, "Unexpected number of samples in class #2")
+    assert weights == [0.1, 0.25]
+    assert X.shape == (100, 20), "X shape mismatch"
+    assert y.shape == (100,), "y shape mismatch"
+    assert np.unique(y).shape == (3,), "Unexpected number of classes"
+    assert sum(y == 0) == 10, "Unexpected number of samples in class #0"
+    assert sum(y == 1) == 25, "Unexpected number of samples in class #1"
+    assert sum(y == 2) == 65, "Unexpected number of samples in class #2"
 
     # Test for n_features > 30
     X, y = make_classification(n_samples=2000, n_features=31, n_informative=31,
                                n_redundant=0, n_repeated=0, hypercube=True,
                                scale=0.5, random_state=0)
 
-    assert_equal(X.shape, (2000, 31), "X shape mismatch")
-    assert_equal(y.shape, (2000,), "y shape mismatch")
-    assert_equal(np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype)
-                 .reshape(-1, X.shape[1]).shape[0], 2000,
-                 "Unexpected number of unique rows")
+    assert X.shape == (2000, 31), "X shape mismatch"
+    assert y.shape == (2000,), "y shape mismatch"
+    assert (np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype)
+                 .reshape(-1, X.shape[1]).shape[0] == 2000), "Unexpected number of unique rows"
 
 
 def test_make_classification_informative_features():
@@ -95,8 +94,8 @@ def test_make_classification_informative_features():
                         n_clusters_per_class=n_clusters_per_class,
                         hypercube=hypercube, random_state=0)
 
-            assert_equal(X.shape, (n_samples, n_informative))
-            assert_equal(y.shape, (n_samples,))
+            assert X.shape == (n_samples, n_informative)
+            assert y.shape == (n_samples,)
 
             # Cluster by sign, viewed as strings to allow uniquing
             signs = np.sign(X)
@@ -104,18 +103,18 @@ def test_make_classification_informative_features():
             unique_signs, cluster_index = np.unique(signs,
                                                     return_inverse=True)
 
-            assert_equal(len(unique_signs), n_clusters,
-                         "Wrong number of clusters, or not in distinct "
-                         "quadrants")
+            assert_message = ("Wrong number of clusters, or not in distinct "
+                              "quadrants")
+            assert len(unique_signs) == n_clusters, assert_message
 
             clusters_by_class = defaultdict(set)
             for cluster, cls in zip(cluster_index, y):
                 clusters_by_class[cls].add(cluster)
             for clusters in clusters_by_class.values():
-                assert_equal(len(clusters), n_clusters_per_class,
-                             "Wrong number of clusters per class")
-            assert_equal(len(clusters_by_class), n_classes,
-                         "Wrong number of classes")
+                assert_message = "Wrong number of clusters per class"
+                assert len(clusters) == n_clusters_per_class, assert_message
+            assert (len(clusters_by_class) 
+                    == n_classes), "Wrong number of classes"
 
             assert_array_almost_equal(np.bincount(y) / len(y) // weights,
                                       [1] * n_classes,
@@ -153,10 +152,10 @@ def test_make_multilabel_classification_return_sequences():
                                               n_classes=3, random_state=0,
                                               return_indicator=False,
                                               allow_unlabeled=allow_unlabeled)
-        assert_equal(X.shape, (100, 20), "X shape mismatch")
+        assert X.shape == (100, 20), "X shape mismatch"
         if not allow_unlabeled:
-            assert_equal(max([max(y) for y in Y]), 2)
-        assert_equal(min([len(y) for y in Y]), min_length)
+            assert max([max(y) for y in Y]) == 2
+        assert min([len(y) for y in Y]) == min_length
         assert max([len(y) for y in Y]) <= 3
 
 
@@ -165,8 +164,8 @@ def test_make_multilabel_classification_return_indicator():
         X, Y = make_multilabel_classification(n_samples=25, n_features=20,
                                               n_classes=3, random_state=0,
                                               allow_unlabeled=allow_unlabeled)
-        assert_equal(X.shape, (25, 20), "X shape mismatch")
-        assert_equal(Y.shape, (25, 3), "Y shape mismatch")
+        assert X.shape == (25, 20), "X shape mismatch"
+        assert Y.shape == (25, 3), "Y shape mismatch"
         assert np.all(np.sum(Y, axis=0) > min_length)
 
     # Also test return_distributions and return_indicator with True
@@ -176,9 +175,9 @@ def test_make_multilabel_classification_return_indicator():
 
     assert_array_almost_equal(X, X2)
     assert_array_equal(Y, Y2)
-    assert_equal(p_c.shape, (3,))
+    assert p_c.shape == (3,)
     assert_almost_equal(p_c.sum(), 1)
-    assert_equal(p_w_c.shape, (20, 3))
+    assert p_w_c.shape == (20, 3)
     assert_almost_equal(p_w_c.sum(axis=0), [1] * 3)
 
 
@@ -188,16 +187,16 @@ def test_make_multilabel_classification_return_indicator_sparse():
                                               n_classes=3, random_state=0,
                                               return_indicator='sparse',
                                               allow_unlabeled=allow_unlabeled)
-        assert_equal(X.shape, (25, 20), "X shape mismatch")
-        assert_equal(Y.shape, (25, 3), "Y shape mismatch")
+        assert X.shape == (25, 20), "X shape mismatch"
+        assert Y.shape == (25, 3), "Y shape mismatch"
         assert sp.issparse(Y)
 
 
 def test_make_hastie_10_2():
     X, y = make_hastie_10_2(n_samples=100, random_state=0)
-    assert_equal(X.shape, (100, 10), "X shape mismatch")
-    assert_equal(y.shape, (100,), "y shape mismatch")
-    assert_equal(np.unique(y).shape, (2,), "Unexpected number of classes")
+    assert X.shape == (100, 10), "X shape mismatch"
+    assert y.shape == (100,), "y shape mismatch"
+    assert np.unique(y).shape == (2,), "Unexpected number of classes"
 
 
 def test_make_regression():
@@ -205,26 +204,26 @@ def test_make_regression():
                               effective_rank=5, coef=True, bias=0.0,
                               noise=1.0, random_state=0)
 
-    assert_equal(X.shape, (100, 10), "X shape mismatch")
-    assert_equal(y.shape, (100,), "y shape mismatch")
-    assert_equal(c.shape, (10,), "coef shape mismatch")
-    assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")
+    assert X.shape == (100, 10), "X shape mismatch"
+    assert y.shape == (100,), "y shape mismatch"
+    assert c.shape == (10,), "coef shape mismatch"
+    assert sum(c != 0.0) == 3, "Unexpected number of informative features"
 
     # Test that y ~= np.dot(X, c) + bias + N(0, 1.0).
     assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)
 
     # Test with small number of features.
     X, y = make_regression(n_samples=100, n_features=1)  # n_informative=3
-    assert_equal(X.shape, (100, 1))
+    assert X.shape == (100, 1)
 
 
 def test_make_regression_multitarget():
     X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                               n_targets=3, coef=True, noise=1., random_state=0)
 
-    assert_equal(X.shape, (100, 10), "X shape mismatch")
-    assert_equal(y.shape, (100, 3), "y shape mismatch")
-    assert_equal(c.shape, (10, 3), "coef shape mismatch")
+    assert X.shape == (100, 10), "X shape mismatch"
+    assert y.shape == (100, 3), "y shape mismatch"
+    assert c.shape == (10, 3), "coef shape mismatch"
     assert_array_equal(sum(c != 0.0), 3,
                        "Unexpected number of informative features")
 
@@ -240,7 +239,7 @@ def test_make_blobs():
 
     assert X.shape == (50, 2), "X shape mismatch"
     assert y.shape == (50,), "y shape mismatch"
-    assert_equal(np.unique(y).shape, (3,), "Unexpected number of blobs")
+    assert np.unique(y).shape == (3,), "Unexpected number of blobs"
     for i, (ctr, std) in enumerate(zip(cluster_centers, cluster_stds)):
         assert_almost_equal((X[y == i] - ctr).std(), std, 1, "Unexpected std")
 
@@ -308,8 +307,8 @@ def test_make_friedman1():
     X, y = make_friedman1(n_samples=5, n_features=10, noise=0.0,
                           random_state=0)
 
-    assert_equal(X.shape, (5, 10), "X shape mismatch")
-    assert_equal(y.shape, (5,), "y shape mismatch")
+    assert X.shape == (5, 10), "X shape mismatch"
+    assert y.shape == (5,), "y shape mismatch"
 
     assert_array_almost_equal(y,
                               10 * np.sin(np.pi * X[:, 0] * X[:, 1])
@@ -320,8 +319,8 @@ def test_make_friedman1():
 def test_make_friedman2():
     X, y = make_friedman2(n_samples=5, noise=0.0, random_state=0)
 
-    assert_equal(X.shape, (5, 4), "X shape mismatch")
-    assert_equal(y.shape, (5,), "y shape mismatch")
+    assert X.shape == (5, 4), "X shape mismatch"
+    assert y.shape == (5,), "y shape mismatch"
 
     assert_array_almost_equal(y,
                               (X[:, 0] ** 2
@@ -332,8 +331,8 @@ def test_make_friedman2():
 def test_make_friedman3():
     X, y = make_friedman3(n_samples=5, noise=0.0, random_state=0)
 
-    assert_equal(X.shape, (5, 4), "X shape mismatch")
-    assert_equal(y.shape, (5,), "y shape mismatch")
+    assert X.shape == (5, 4), "X shape mismatch"
+    assert y.shape == (5,), "y shape mismatch"
 
     assert_array_almost_equal(y, np.arctan((X[:, 1] * X[:, 2]
                                             - 1 / (X[:, 1] * X[:, 3]))
@@ -344,22 +343,22 @@ def test_make_low_rank_matrix():
     X = make_low_rank_matrix(n_samples=50, n_features=25, effective_rank=5,
                              tail_strength=0.01, random_state=0)
 
-    assert_equal(X.shape, (50, 25), "X shape mismatch")
+    assert X.shape == (50, 25), "X shape mismatch"
 
     from numpy.linalg import svd
     u, s, v = svd(X)
-    assert_less(sum(s) - 5, 0.1, "X rank is not approximately 5")
+    assert sum(s) - 5 < 0.1, "X rank is not approximately 5"
 
 
 def test_make_sparse_coded_signal():
     Y, D, X = make_sparse_coded_signal(n_samples=5, n_components=8,
                                        n_features=10, n_nonzero_coefs=3,
                                        random_state=0)
-    assert_equal(Y.shape, (10, 5), "Y shape mismatch")
-    assert_equal(D.shape, (10, 8), "D shape mismatch")
-    assert_equal(X.shape, (8, 5), "X shape mismatch")
+    assert Y.shape == (10, 5), "Y shape mismatch"
+    assert D.shape == (10, 8), "D shape mismatch"
+    assert X.shape == (8, 5), "X shape mismatch"
     for col in X.T:
-        assert_equal(len(np.flatnonzero(col)), 3, 'Non-zero coefs mismatch')
+        assert len(np.flatnonzero(col)) == 3, 'Non-zero coefs mismatch'
     assert_array_almost_equal(np.dot(D, X), Y)
     assert_array_almost_equal(np.sqrt((D ** 2).sum(axis=0)),
                               np.ones(D.shape[1]))
@@ -368,14 +367,14 @@ def test_make_sparse_coded_signal():
 def test_make_sparse_uncorrelated():
     X, y = make_sparse_uncorrelated(n_samples=5, n_features=10, random_state=0)
 
-    assert_equal(X.shape, (5, 10), "X shape mismatch")
-    assert_equal(y.shape, (5,), "y shape mismatch")
+    assert X.shape == (5, 10), "X shape mismatch"
+    assert y.shape == (5,), "y shape mismatch"
 
 
 def test_make_spd_matrix():
     X = make_spd_matrix(n_dim=5, random_state=0)
 
-    assert_equal(X.shape, (5, 5), "X shape mismatch")
+    assert X.shape == (5, 5), "X shape mismatch"
     assert_array_almost_equal(X, X.T)
 
     from numpy.linalg import eig
@@ -387,8 +386,8 @@ def test_make_spd_matrix():
 def test_make_swiss_roll():
     X, t = make_swiss_roll(n_samples=5, noise=0.0, random_state=0)
 
-    assert_equal(X.shape, (5, 3), "X shape mismatch")
-    assert_equal(t.shape, (5,), "t shape mismatch")
+    assert X.shape == (5, 3), "X shape mismatch"
+    assert t.shape == (5,), "t shape mismatch"
     assert_array_almost_equal(X[:, 0], t * np.cos(t))
     assert_array_almost_equal(X[:, 2], t * np.sin(t))
 
@@ -396,8 +395,8 @@ def test_make_swiss_roll():
 def test_make_s_curve():
     X, t = make_s_curve(n_samples=5, noise=0.0, random_state=0)
 
-    assert_equal(X.shape, (5, 3), "X shape mismatch")
-    assert_equal(t.shape, (5,), "t shape mismatch")
+    assert X.shape == (5, 3), "X shape mismatch"
+    assert t.shape == (5,), "t shape mismatch"
     assert_array_almost_equal(X[:, 0], np.sin(t))
     assert_array_almost_equal(X[:, 2], np.sign(t) * (np.cos(t) - 1))
 
@@ -405,9 +404,9 @@ def test_make_s_curve():
 def test_make_biclusters():
     X, rows, cols = make_biclusters(
         shape=(100, 100), n_clusters=4, shuffle=True, random_state=0)
-    assert_equal(X.shape, (100, 100), "X shape mismatch")
-    assert_equal(rows.shape, (4, 100), "rows shape mismatch")
-    assert_equal(cols.shape, (4, 100,), "columns shape mismatch")
+    assert X.shape == (100, 100), "X shape mismatch"
+    assert rows.shape == (4, 100), "rows shape mismatch"
+    assert cols.shape == (4, 100,), "columns shape mismatch"
     assert_all_finite(X)
     assert_all_finite(rows)
     assert_all_finite(cols)
@@ -421,9 +420,9 @@ def test_make_checkerboard():
     X, rows, cols = make_checkerboard(
         shape=(100, 100), n_clusters=(20, 5),
         shuffle=True, random_state=0)
-    assert_equal(X.shape, (100, 100), "X shape mismatch")
-    assert_equal(rows.shape, (100, 100), "rows shape mismatch")
-    assert_equal(cols.shape, (100, 100,), "columns shape mismatch")
+    assert X.shape == (100, 100), "X shape mismatch"
+    assert rows.shape == (100, 100), "rows shape mismatch"
+    assert cols.shape == (100, 100,), "columns shape mismatch"
 
     X, rows, cols = make_checkerboard(
         shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
@@ -455,8 +454,8 @@ def test_make_circles():
         # created an even number of samples.
         X, y = make_circles(n_samples, shuffle=False, noise=None,
                             factor=factor)
-        assert_equal(X.shape, (n_samples, 2), "X shape mismatch")
-        assert_equal(y.shape, (n_samples,), "y shape mismatch")
+        assert X.shape == (n_samples, 2), "X shape mismatch"
+        assert y.shape == (n_samples,), "y shape mismatch"
         center = [0.0, 0.0]
         for x, label in zip(X, y):
             dist_sqr = ((x - center) ** 2).sum()
@@ -464,10 +463,8 @@ def test_make_circles():
             assert_almost_equal(dist_sqr, dist_exp,
                                 err_msg="Point is not on expected circle")
 
-        assert_equal(X[y == 0].shape, (n_outer, 2),
-                     "Samples not correctly distributed across circles.")
-        assert_equal(X[y == 1].shape, (n_inner, 2),
-                     "Samples not correctly distributed across circles.")
+        assert X[y == 0].shape == (n_outer, 2), "Samples not correctly distributed across circles."
+        assert X[y == 1].shape == (n_inner, 2), "Samples not correctly distributed across circles."
 
     assert_raises(ValueError, make_circles, factor=-0.01)
     assert_raises(ValueError, make_circles, factor=1.)
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index c25344e4acfcf..bec67a7aa3819 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -34,28 +34,28 @@ def test_load_svmlight_file():
     X, y = load_svmlight_file(datafile)
 
     # test X's shape
-    assert_equal(X.indptr.shape[0], 7)
-    assert_equal(X.shape[0], 6)
-    assert_equal(X.shape[1], 21)
-    assert_equal(y.shape[0], 6)
+    assert X.indptr.shape[0] == 7
+    assert X.shape[0] == 6
+    assert X.shape[1] == 21
+    assert y.shape[0] == 6
 
     # test X's non-zero values
     for i, j, val in ((0, 2, 2.5), (0, 10, -5.2), (0, 15, 1.5),
                       (1, 5, 1.0), (1, 12, -3),
                       (2, 20, 27)):
 
-        assert_equal(X[i, j], val)
+        assert X[i, j] == val
 
     # tests X's zero values
-    assert_equal(X[0, 3], 0)
-    assert_equal(X[0, 5], 0)
-    assert_equal(X[1, 8], 0)
-    assert_equal(X[1, 16], 0)
-    assert_equal(X[2, 18], 0)
+    assert X[0, 3] == 0
+    assert X[0, 5] == 0
+    assert X[1, 8] == 0
+    assert X[1, 16] == 0
+    assert X[2, 18] == 0
 
     # test can change X's values
     X[0, 2] *= 2
-    assert_equal(X[0, 2], 5)
+    assert X[0, 2] == 5
 
     # test y
     assert_array_equal(y, [1, 2, 3, 4, 1, 2])
@@ -76,7 +76,7 @@ def test_load_svmlight_file_fd():
 
 def test_load_svmlight_file_multilabel():
     X, y = load_svmlight_file(multifile, multilabel=True)
-    assert_equal(y, [(0, 1), (2,), (), (1, 2)])
+    assert y == [(0, 1), (2,), (), (1, 2)]
 
 
 def test_load_svmlight_files():
@@ -84,29 +84,29 @@ def test_load_svmlight_files():
                                                            dtype=np.float32)
     assert_array_equal(X_train.toarray(), X_test.toarray())
     assert_array_almost_equal(y_train, y_test)
-    assert_equal(X_train.dtype, np.float32)
-    assert_equal(X_test.dtype, np.float32)
+    assert X_train.dtype == np.float32
+    assert X_test.dtype == np.float32
 
     X1, y1, X2, y2, X3, y3 = load_svmlight_files([datafile] * 3,
                                                  dtype=np.float64)
-    assert_equal(X1.dtype, X2.dtype)
-    assert_equal(X2.dtype, X3.dtype)
-    assert_equal(X3.dtype, np.float64)
+    assert X1.dtype == X2.dtype
+    assert X2.dtype == X3.dtype
+    assert X3.dtype == np.float64
 
 
 def test_load_svmlight_file_n_features():
     X, y = load_svmlight_file(datafile, n_features=22)
 
     # test X'shape
-    assert_equal(X.indptr.shape[0], 7)
-    assert_equal(X.shape[0], 6)
-    assert_equal(X.shape[1], 22)
+    assert X.indptr.shape[0] == 7
+    assert X.shape[0] == 6
+    assert X.shape[1] == 22
 
     # test X's non-zero values
     for i, j, val in ((0, 2, 2.5), (0, 10, -5.2),
                       (1, 5, 1.0), (1, 12, -3)):
 
-        assert_equal(X[i, j], val)
+        assert X[i, j] == val
 
     # 21 features in file
     assert_raises(ValueError, load_svmlight_file, datafile, n_features=20)
@@ -159,13 +159,13 @@ def test_load_zero_based_auto():
 
     f1 = BytesIO(data1)
     X, y = load_svmlight_file(f1, zero_based="auto")
-    assert_equal(X.shape, (1, 3))
+    assert X.shape == (1, 3)
 
     f1 = BytesIO(data1)
     f2 = BytesIO(data2)
     X1, y1, X2, y2 = load_svmlight_files([f1, f2], zero_based="auto")
-    assert_equal(X1.shape, (1, 4))
-    assert_equal(X2.shape, (1, 4))
+    assert X1.shape == (1, 4)
+    assert X2.shape == (1, 4)
 
 
 def test_load_with_qid():
@@ -250,16 +250,16 @@ def test_dump():
                     comment = f.readline()
                     comment = str(comment, "utf-8")
 
-                    assert_in("scikit-learn %s" % sklearn.__version__, comment)
+                    assert "scikit-learn %s" % sklearn.__version__ in comment
 
                     comment = f.readline()
                     comment = str(comment, "utf-8")
 
-                    assert_in(["one", "zero"][zero_based] + "-based", comment)
+                    assert ["one", "zero"][zero_based] + "-based" in comment
 
                     X2, y2 = load_svmlight_file(f, dtype=dtype,
                                                 zero_based=zero_based)
-                    assert_equal(X2.dtype, dtype)
+                    assert X2.dtype == dtype
                     assert_array_equal(X2.sorted_indices().indices, X2.indices)
 
                     X2_dense = X2.toarray()
@@ -293,9 +293,9 @@ def test_dump_multilabel():
         dump_svmlight_file(X, y, f, multilabel=True)
         f.seek(0)
         # make sure it dumps multilabel correctly
-        assert_equal(f.readline(), b"1 0:1 2:3 4:5\n")
-        assert_equal(f.readline(), b"0,2 \n")
-        assert_equal(f.readline(), b"0,1 1:5 3:1\n")
+        assert f.readline() == b"1 0:1 2:3 4:5\n"
+        assert f.readline() == b"0,2 \n"
+        assert f.readline() == b"0,1 1:5 3:1\n"
 
 
 def test_dump_concise():
@@ -315,12 +315,12 @@ def test_dump_concise():
     dump_svmlight_file(X, y, f)
     f.seek(0)
     # make sure it's using the most concise format possible
-    assert_equal(f.readline(),
+    assert (f.readline() ==
                  b"1 0:1 1:2.1 2:3.01 3:1.000000000000001 4:1\n")
-    assert_equal(f.readline(), b"2.1 0:1000000000 1:2e+18 2:3e+27\n")
-    assert_equal(f.readline(), b"3.01 \n")
-    assert_equal(f.readline(), b"1.000000000000001 \n")
-    assert_equal(f.readline(), b"1 \n")
+    assert f.readline() == b"2.1 0:1000000000 1:2e+18 2:3e+27\n"
+    assert f.readline() == b"3.01 \n"
+    assert f.readline() == b"1.000000000000001 \n"
+    assert f.readline() == b"1 \n"
     f.seek(0)
     # make sure it's correct too :)
     X2, y2 = load_svmlight_file(f)

From 2065724da6f77da51c654705fa904ae3228adbab Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:31:04 +0200
Subject: [PATCH 04/22] fix decomposition

---
 .../decomposition/tests/test_dict_learning.py | 24 ++++++++---------
 .../tests/test_factor_analysis.py             |  6 ++---
 sklearn/decomposition/tests/test_fastica.py   | 20 +++++++-------
 .../decomposition/tests/test_kernel_pca.py    | 26 +++++++++----------
 sklearn/decomposition/tests/test_nmf.py       | 12 ++++-----
 .../decomposition/tests/test_online_lda.py    | 10 +++----
 .../decomposition/tests/test_sparse_pca.py    | 16 ++++++------
 7 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index f0bd4bedfe5b1..cbe4c822cb5ab 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -37,18 +37,18 @@ def test_sparse_encode_shapes_omp():
         for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
             code = sparse_encode(X_, dictionary, algorithm=algorithm,
                                  n_jobs=n_jobs)
-            assert_equal(code.shape, (n_samples, n_components))
+            assert code.shape == (n_samples, n_components)
 
 
 def test_dict_learning_shapes():
     n_components = 5
     dico = DictionaryLearning(n_components, random_state=0).fit(X)
-    assert_equal(dico.components_.shape, (n_components, n_features))
+    assert dico.components_.shape == (n_components, n_features)
 
     n_components = 1
     dico = DictionaryLearning(n_components, random_state=0).fit(X)
-    assert_equal(dico.components_.shape, (n_components, n_features))
-    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
+    assert dico.components_.shape == (n_components, n_features)
+    assert dico.transform(X).shape == (X.shape[0], n_components)
 
 
 def test_dict_learning_overcomplete():
@@ -166,7 +166,7 @@ def test_dict_learning_nonzero_coefs():
 
     dico.set_params(transform_algorithm='omp')
     code = dico.transform(X[np.newaxis, 1])
-    assert_equal(len(np.flatnonzero(code)), 3)
+    assert len(np.flatnonzero(code)) == 3
 
 
 def test_dict_learning_unknown_fit_algorithm():
@@ -192,9 +192,9 @@ def test_dict_learning_online_shapes():
     n_components = 8
     code, dictionary = dict_learning_online(X, n_components=n_components,
                                             alpha=1, random_state=rng)
-    assert_equal(code.shape, (n_samples, n_components))
-    assert_equal(dictionary.shape, (n_components, n_features))
-    assert_equal(np.dot(code, dictionary).shape, X.shape)
+    assert code.shape == (n_samples, n_components)
+    assert dictionary.shape == (n_components, n_features)
+    assert np.dot(code, dictionary).shape == X.shape
 
 
 def test_dict_learning_online_lars_positive_parameter():
@@ -352,7 +352,7 @@ def test_sparse_encode_shapes():
     V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
     for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'):
         code = sparse_encode(X, V, algorithm=algo)
-        assert_equal(code.shape, (n_samples, n_components))
+        assert code.shape == (n_samples, n_components)
 
 
 @pytest.mark.parametrize("algo", [
@@ -404,7 +404,7 @@ def test_sparse_encode_error():
     V /= np.sum(V ** 2, axis=1)[:, np.newaxis]
     code = sparse_encode(X, V, alpha=0.001)
     assert not np.all(code == 0)
-    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+    assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
 
 
 def test_sparse_encode_error_default_sparsity():
@@ -413,7 +413,7 @@ def test_sparse_encode_error_default_sparsity():
     D = rng.randn(2, 64)
     code = ignore_warnings(sparse_encode)(X, D, algorithm='omp',
                                           n_nonzero_coefs=None)
-    assert_equal(code.shape, (100, 2))
+    assert code.shape == (100, 2)
 
 
 def test_unknown_method():
@@ -431,7 +431,7 @@ def test_sparse_coder_estimator():
     code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars',
                        transform_alpha=0.001).transform(X)
     assert not np.all(code == 0)
-    assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
+    assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
 
 
 def test_sparse_coder_parallel_mmap():
diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py
index f039ef2abc865..8547a3c0f6bff 100644
--- a/sklearn/decomposition/tests/test_factor_analysis.py
+++ b/sklearn/decomposition/tests/test_factor_analysis.py
@@ -46,13 +46,13 @@ def test_factor_analysis():
         fas.append(fa)
 
         X_t = fa.transform(X)
-        assert_equal(X_t.shape, (n_samples, n_components))
+        assert X_t.shape == (n_samples, n_components)
 
         assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
         assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))
 
         diff = np.all(np.diff(fa.loglike_))
-        assert_greater(diff, 0., 'Log likelihood dif not increase')
+        assert diff > 0., 'Log likelihood dif not increase'
 
         # Sample Covariance
         scov = np.cov(X, rowvar=0., bias=1.)
@@ -60,7 +60,7 @@ def test_factor_analysis():
         # Model Covariance
         mcov = fa.get_covariance()
         diff = np.sum(np.abs(scov - mcov)) / W.size
-        assert_less(diff, 0.1, "Mean absolute difference is %f" % diff)
+        assert diff < 0.1, "Mean absolute difference is %f" % diff
         fa = FactorAnalysis(n_components=n_components,
                             noise_variance_init=np.ones(n_features))
         assert_raises(ValueError, fa.fit, X[:, :2])
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 40299ac2aac3a..04ef5d6f86fba 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -44,11 +44,11 @@ def test_gs():
     W, _, _ = np.linalg.svd(rng.randn(10, 10))
     w = rng.randn(10)
     _gs_decorrelation(w, W, 10)
-    assert_less((w ** 2).sum(), 1.e-10)
+    assert (w ** 2).sum() < 1.e-10
     w = rng.randn(10)
     u = _gs_decorrelation(w, W, 5)
     tmp = np.dot(u, W.T)
-    assert_less((tmp[:5] ** 2).sum(), 1.e-10)
+    assert (tmp[:5] ** 2).sum() < 1.e-10
 
 
 @pytest.mark.parametrize("add_noise", [True, False])
@@ -123,13 +123,13 @@ def g_test(x):
                                 random_state=seed)
     ica = FastICA(fun=nl, algorithm=algo, random_state=seed)
     sources = ica.fit_transform(m.T)
-    assert_equal(ica.components_.shape, (2, 2))
-    assert_equal(sources.shape, (1000, 2))
+    assert ica.components_.shape == (2, 2)
+    assert sources.shape == (1000, 2)
 
     assert_array_almost_equal(sources_fun, sources)
     assert_array_almost_equal(sources, ica.transform(m.T))
 
-    assert_equal(ica.mixing_.shape, (2, 2))
+    assert ica.mixing_.shape == (2, 2)
 
     for fn in [np.tanh, "exp(-.5(x^2))"]:
         ica = FastICA(fun=fn, algorithm=algo)
@@ -225,12 +225,12 @@ def test_fit_transform():
 
         ica = FastICA(n_components=n_components, whiten=whiten, random_state=0)
         Xt = ica.fit_transform(X)
-        assert_equal(ica.components_.shape, (n_components_, 10))
-        assert_equal(Xt.shape, (100, n_components_))
+        assert ica.components_.shape == (n_components_, 10)
+        assert Xt.shape == (100, n_components_)
 
         ica = FastICA(n_components=n_components, whiten=whiten, random_state=0)
         ica.fit(X)
-        assert_equal(ica.components_.shape, (n_components_, 10))
+        assert ica.components_.shape == (n_components_, 10)
         Xt2 = ica.transform(X)
 
         assert_array_almost_equal(Xt, Xt2)
@@ -257,9 +257,9 @@ def test_inverse_transform():
                 # catch "n_components ignored" warning
                 Xt = ica.fit_transform(X)
             expected_shape = expected[(whiten, n_components_)]
-            assert_equal(ica.mixing_.shape, expected_shape)
+            assert ica.mixing_.shape == expected_shape
             X2 = ica.inverse_transform(Xt)
-            assert_equal(X.shape, X2.shape)
+            assert X.shape == X2.shape
 
             # reversibility test in non-reduction case
             if n_components == X.shape[1]:
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index 2073875e76c15..c5ac24b3423f7 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -21,7 +21,7 @@ def test_kernel_pca():
 
     def histogram(x, y, **kwargs):
         # Histogram kernel implemented as a callable.
-        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
+        assert kwargs == {}    # no kernel_params that we didn't ask for
         return np.minimum(x, y).sum()
 
     for eigen_solver in ("auto", "dense", "arpack"):
@@ -40,17 +40,17 @@ def histogram(x, y, **kwargs):
 
             # non-regression test: previously, gamma would be 0 by default,
             # forcing all eigenvalues to 0 under the poly kernel
-            assert_not_equal(X_fit_transformed.size, 0)
+            assert X_fit_transformed.size != 0
 
             # transform new data
             X_pred_transformed = kpca.transform(X_pred)
-            assert_equal(X_pred_transformed.shape[1],
+            assert (X_pred_transformed.shape[1] ==
                          X_fit_transformed.shape[1])
 
             # inverse transform
             if inv:
                 X_pred2 = kpca.inverse_transform(X_pred_transformed)
-                assert_equal(X_pred2.shape, X_pred.shape)
+                assert X_pred2.shape == X_pred.shape
 
 
 def test_kernel_pca_invalid_parameters():
@@ -103,7 +103,7 @@ def test_kernel_pca_sparse():
 
             # transform new data
             X_pred_transformed = kpca.transform(X_pred)
-            assert_equal(X_pred_transformed.shape[1],
+            assert (X_pred_transformed.shape[1] ==
                          X_fit_transformed.shape[1])
 
             # inverse transform
@@ -135,7 +135,7 @@ def test_kernel_pca_n_components():
             kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
             shape = kpca.fit(X_fit).transform(X_pred).shape
 
-            assert_equal(shape, (2, c))
+            assert shape == (2, c)
 
 
 def test_remove_zero_eig():
@@ -144,15 +144,15 @@ def test_remove_zero_eig():
     # n_components=None (default) => remove_zero_eig is True
     kpca = KernelPCA()
     Xt = kpca.fit_transform(X)
-    assert_equal(Xt.shape, (3, 0))
+    assert Xt.shape == (3, 0)
 
     kpca = KernelPCA(n_components=2)
     Xt = kpca.fit_transform(X)
-    assert_equal(Xt.shape, (3, 2))
+    assert Xt.shape == (3, 2)
 
     kpca = KernelPCA(n_components=2, remove_zero_eig=True)
     Xt = kpca.fit_transform(X)
-    assert_equal(Xt.shape, (3, 0))
+    assert Xt.shape == (3, 0)
 
 
 def test_leave_zero_eig():
@@ -227,7 +227,7 @@ def test_gridsearch_pipeline():
     param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
     grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
     grid_search.fit(X, y)
-    assert_equal(grid_search.best_score_, 1)
+    assert grid_search.best_score_ == 1
 
 
 # 0.23. warning about tol not having its correct default value.
@@ -244,7 +244,7 @@ def test_gridsearch_pipeline_precomputed():
     grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
     X_kernel = rbf_kernel(X, gamma=2.)
     grid_search.fit(X_kernel, y)
-    assert_equal(grid_search.best_score_, 1)
+    assert grid_search.best_score_ == 1
 
 
 # 0.23. warning about tol not having its correct default value.
@@ -256,7 +256,7 @@ def test_nested_circles():
 
     # 2D nested circles are not linearly separable
     train_score = Perceptron(max_iter=5).fit(X, y).score(X, y)
-    assert_less(train_score, 0.8)
+    assert train_score < 0.8
 
     # Project the circles data into the first 2 components of a RBF Kernel
     # PCA model.
@@ -269,4 +269,4 @@ def test_nested_circles():
 
     # The data is perfectly linearly separable in that space
     train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y)
-    assert_equal(train_score, 1.0)
+    assert train_score == 1.0
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index f2f41ecc52f9a..b6703f0c24c0c 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -111,7 +111,7 @@ def test_nmf_fit_close(solver):
     pnmf = NMF(5, solver=solver, init='nndsvdar', random_state=0,
                max_iter=600)
     X = np.abs(rng.randn(6, 5))
-    assert_less(pnmf.fit(X).reconstruction_err_, 0.1)
+    assert pnmf.fit(X).reconstruction_err_ < 0.1
 
 
 @pytest.mark.parametrize('solver', ('cd', 'mu'))
@@ -438,8 +438,8 @@ def test_nmf_regularization():
         H_regul_n_zeros = H_regul[H_regul == 0].size
         H_model_n_zeros = H_model[H_model == 0].size
 
-        assert_greater(W_regul_n_zeros, W_model_n_zeros)
-        assert_greater(H_regul_n_zeros, H_model_n_zeros)
+        assert W_regul_n_zeros > W_model_n_zeros
+        assert H_regul_n_zeros > H_model_n_zeros
 
     # L2 regularization should decrease the mean of the coefficients
     l1_ratio = 0.
@@ -455,8 +455,8 @@ def test_nmf_regularization():
         H_regul = regul.components_
         H_model = model.components_
 
-        assert_greater(W_model.mean(), W_regul.mean())
-        assert_greater(H_model.mean(), H_regul.mean())
+        assert W_model.mean() > W_regul.mean()
+        assert H_model.mean() > H_regul.mean()
 
 
 @ignore_warnings(category=ConvergenceWarning)
@@ -493,7 +493,7 @@ def test_nmf_decreasing():
 
                 loss = nmf._beta_divergence(X, W, H, beta_loss)
                 if previous_loss is not None:
-                    assert_greater(previous_loss, loss)
+                    assert previous_loss > loss
                 previous_loss = loss
 
 
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index c163a6d58ea65..1c13c890c2ea6 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -274,11 +274,11 @@ def test_lda_perplexity(method):
 
     lda_2.fit(X)
     perp_2 = lda_2.perplexity(X, sub_sampling=False)
-    assert_greater_equal(perp_1, perp_2)
+    assert perp_1 >= perp_2
 
     perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
     perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
-    assert_greater_equal(perp_1_subsampling, perp_2_subsampling)
+    assert perp_1_subsampling >= perp_2_subsampling
 
 
 @pytest.mark.parametrize('method', ('online', 'batch'))
@@ -297,7 +297,7 @@ def test_lda_score(method):
 
     lda_2.fit_transform(X)
     score_2 = lda_2.score(X)
-    assert_greater_equal(score_2, score_1)
+    assert score_2 >= score_1
 
 
 def test_perplexity_input_format():
@@ -384,8 +384,8 @@ def check_verbosity(verbose, evaluate_every, expected_lines,
 
     n_lines = out.getvalue().count('\n')
     n_perplexity = out.getvalue().count('perplexity')
-    assert_equal(expected_lines, n_lines)
-    assert_equal(expected_perplexities, n_perplexity)
+    assert expected_lines == n_lines
+    assert expected_perplexities == n_perplexity
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 621738f969d1e..8440dd17717bc 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -45,13 +45,13 @@ def test_correct_shapes():
     X = rng.randn(12, 10)
     spca = SparsePCA(n_components=8, random_state=rng)
     U = spca.fit_transform(X)
-    assert_equal(spca.components_.shape, (8, 10))
-    assert_equal(U.shape, (12, 8))
+    assert spca.components_.shape == (8, 10)
+    assert U.shape == (12, 8)
     # test overcomplete decomposition
     spca = SparsePCA(n_components=13, random_state=rng)
     U = spca.fit_transform(X)
-    assert_equal(spca.components_.shape, (13, 10))
-    assert_equal(U.shape, (12, 13))
+    assert spca.components_.shape == (13, 10)
+    assert U.shape == (12, 13)
 
 
 def test_fit_transform():
@@ -122,13 +122,13 @@ def test_mini_batch_correct_shapes():
     X = rng.randn(12, 10)
     pca = MiniBatchSparsePCA(n_components=8, random_state=rng)
     U = pca.fit_transform(X)
-    assert_equal(pca.components_.shape, (8, 10))
-    assert_equal(U.shape, (12, 8))
+    assert pca.components_.shape == (8, 10)
+    assert U.shape == (12, 8)
     # test overcomplete decomposition
     pca = MiniBatchSparsePCA(n_components=13, random_state=rng)
     U = pca.fit_transform(X)
-    assert_equal(pca.components_.shape, (13, 10))
-    assert_equal(U.shape, (12, 13))
+    assert pca.components_.shape == (13, 10)
+    assert U.shape == (12, 13)
 
 
 # XXX: test always skipped

From 99ee74a661a738973353a5ab2448c5ea73a64f76 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:35:59 +0200
Subject: [PATCH 05/22] fix ensemble

---
 sklearn/ensemble/tests/test_bagging.py        |  32 ++---
 sklearn/ensemble/tests/test_base.py           |  18 +--
 sklearn/ensemble/tests/test_forest.py         | 110 +++++++-------
 .../ensemble/tests/test_gradient_boosting.py  | 134 +++++++++---------
 .../test_gradient_boosting_loss_functions.py  |   6 +-
 sklearn/ensemble/tests/test_iforest.py        |  14 +-
 sklearn/ensemble/tests/test_voting.py         |  22 +--
 .../ensemble/tests/test_weight_boosting.py    |  38 ++---
 8 files changed, 185 insertions(+), 189 deletions(-)

diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index 8afa0e5c68ec0..f4bda051816ee 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -241,7 +241,7 @@ def test_bootstrap_samples():
                                 bootstrap=False,
                                 random_state=rng).fit(X_train, y_train)
 
-    assert_equal(base_estimator.score(X_train, y_train),
+    assert (base_estimator.score(X_train, y_train) ==
                  ensemble.score(X_train, y_train))
 
     # with bootstrap, trees are no longer perfect on the training set
@@ -250,7 +250,7 @@ def test_bootstrap_samples():
                                 bootstrap=True,
                                 random_state=rng).fit(X_train, y_train)
 
-    assert_greater(base_estimator.score(X_train, y_train),
+    assert (base_estimator.score(X_train, y_train) >
                    ensemble.score(X_train, y_train))
 
     # check that each sampling correspond to a complete bootstrap resample.
@@ -278,7 +278,7 @@ def test_bootstrap_features():
                                 random_state=rng).fit(X_train, y_train)
 
     for features in ensemble.estimators_features_:
-        assert_equal(boston.data.shape[1], np.unique(features).shape[0])
+        assert boston.data.shape[1] == np.unique(features).shape[0]
 
     ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
                                 max_features=1.0,
@@ -286,7 +286,7 @@ def test_bootstrap_features():
                                 random_state=rng).fit(X_train, y_train)
 
     for features in ensemble.estimators_features_:
-        assert_greater(boston.data.shape[1], np.unique(features).shape[0])
+        assert boston.data.shape[1] > np.unique(features).shape[0]
 
 
 def test_probability():
@@ -338,7 +338,7 @@ def test_oob_score_classification():
 
         test_score = clf.score(X_test, y_test)
 
-        assert_less(abs(test_score - clf.oob_score_), 0.1)
+        assert abs(test_score - clf.oob_score_) < 0.1
 
         # Test with few estimators
         assert_warns(UserWarning,
@@ -367,7 +367,7 @@ def test_oob_score_regression():
 
     test_score = clf.score(X_test, y_test)
 
-    assert_less(abs(test_score - clf.oob_score_), 0.1)
+    assert abs(test_score - clf.oob_score_) < 0.1
 
     # Test with few estimators
     assert_warns(UserWarning,
@@ -616,13 +616,13 @@ def test_warm_start(random_state=42):
         else:
             clf_ws.set_params(n_estimators=n_estimators)
         clf_ws.fit(X, y)
-        assert_equal(len(clf_ws), n_estimators)
+        assert len(clf_ws) == n_estimators
 
     clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
                                   warm_start=False)
     clf_no_ws.fit(X, y)
 
-    assert_equal(set([tree.random_state for tree in clf_ws]),
+    assert (set([tree.random_state for tree in clf_ws]) ==
                  set([tree.random_state for tree in clf_no_ws]))
 
 
@@ -700,7 +700,7 @@ def test_oob_score_consistency():
     bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5,
                                 max_features=0.5, oob_score=True,
                                 random_state=1)
-    assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)
+    assert bagging.fit(X, y).oob_score_ == bagging.fit(X, y).oob_score_
 
 
 def test_estimators_samples():
@@ -719,9 +719,9 @@ def test_estimators_samples():
     estimators = bagging.estimators_
 
     # Test for correct formatting
-    assert_equal(len(estimators_samples), len(estimators))
-    assert_equal(len(estimators_samples[0]), len(X) // 2)
-    assert_equal(estimators_samples[0].dtype.kind, 'i')
+    assert len(estimators_samples) == len(estimators)
+    assert len(estimators_samples[0]) == len(X) // 2
+    assert estimators_samples[0].dtype.kind == 'i'
 
     # Re-fit single estimator to test for consistent sampling
     estimator_index = 0
@@ -776,7 +776,7 @@ def test_max_samples_consistency():
                                 max_samples=max_samples,
                                 max_features=0.5, random_state=1)
     bagging.fit(X, y)
-    assert_equal(bagging._max_samples, max_samples)
+    assert bagging._max_samples == max_samples
 
 
 def test_set_oob_score_label_encoding():
@@ -793,7 +793,7 @@ def test_set_oob_score_label_encoding():
                            random_state=random_state).fit(X, Y2).oob_score_
     x3 = BaggingClassifier(oob_score=True,
                            random_state=random_state).fit(X, Y3).oob_score_
-    assert_equal([x1, x2], [x3, x3])
+    assert [x1, x2] == [x3, x3]
 
 
 def replace(X):
@@ -829,7 +829,7 @@ def test_bagging_regressor_with_missing_inputs():
         pipeline.fit(X, y).predict(X)
         bagging_regressor = BaggingRegressor(pipeline)
         y_hat = bagging_regressor.fit(X, y).predict(X)
-        assert_equal(y.shape, y_hat.shape)
+        assert y.shape == y_hat.shape
 
         # Verify that exceptions can be raised by wrapper regressor
         regressor = DecisionTreeRegressor()
@@ -857,7 +857,7 @@ def test_bagging_classifier_with_missing_inputs():
     bagging_classifier = BaggingClassifier(pipeline)
     bagging_classifier.fit(X, y)
     y_hat = bagging_classifier.predict(X)
-    assert_equal(y.shape, y_hat.shape)
+    assert y.shape == y_hat.shape
     bagging_classifier.predict_log_proba(X)
     bagging_classifier.predict_proba(X)
 
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index d283aadf65d73..7cd2124359e72 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -36,14 +36,14 @@ def test_base():
     ensemble._make_estimator(random_state=random_state)
     ensemble._make_estimator(append=False)
 
-    assert_equal(3, len(ensemble))
-    assert_equal(3, len(ensemble.estimators_))
+    assert 3 == len(ensemble)
+    assert 3 == len(ensemble.estimators_)
 
     assert isinstance(ensemble[0], Perceptron)
-    assert_equal(ensemble[0].random_state, None)
+    assert ensemble[0].random_state == None
     assert isinstance(ensemble[1].random_state, int)
     assert isinstance(ensemble[2].random_state, int)
-    assert_not_equal(ensemble[1].random_state, ensemble[2].random_state)
+    assert ensemble[1].random_state != ensemble[2].random_state
 
     np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
                                         n_estimators=np.int32(3))
@@ -82,7 +82,7 @@ def test_set_random_states():
     _set_random_states(LinearDiscriminantAnalysis(), random_state=17)
 
     clf1 = Perceptron(tol=1e-3, random_state=None)
-    assert_equal(clf1.random_state, None)
+    assert clf1.random_state == None
     # check random_state is None still sets
     _set_random_states(clf1, None)
     assert isinstance(clf1.random_state, int)
@@ -92,7 +92,7 @@ def test_set_random_states():
     assert isinstance(clf1.random_state, int)
     clf2 = Perceptron(tol=1e-3, random_state=None)
     _set_random_states(clf2, 3)
-    assert_equal(clf1.random_state, clf2.random_state)
+    assert clf1.random_state == clf2.random_state
 
     # nested random_state
 
@@ -105,7 +105,7 @@ def make_steps():
     _set_random_states(est1, 3)
     assert isinstance(est1.steps[0][1].estimator.random_state, int)
     assert isinstance(est1.steps[1][1].random_state, int)
-    assert_not_equal(est1.get_params()['sel__estimator__random_state'],
+    assert (est1.get_params()['sel__estimator__random_state'] !=
                      est1.get_params()['clf__random_state'])
 
     # ensure multiple random_state parameters are invariant to get_params()
@@ -124,7 +124,7 @@ def get_params(self, *args, **kwargs):
     for cls in [AlphaParamPipeline, RevParamPipeline]:
         est2 = cls(make_steps())
         _set_random_states(est2, 3)
-        assert_equal(est1.get_params()['sel__estimator__random_state'],
+        assert (est1.get_params()['sel__estimator__random_state'] ==
                      est2.get_params()['sel__estimator__random_state'])
-        assert_equal(est1.get_params()['clf__random_state'],
+        assert (est1.get_params()['clf__random_state'] ==
                      est2.get_params()['clf__random_state'])
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index e23d812611681..93b3309ba5a1a 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -119,16 +119,16 @@ def check_classification_toy(name):
     clf = ForestClassifier(n_estimators=10, random_state=1)
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(10, len(clf))
+    assert 10 == len(clf)
 
     clf = ForestClassifier(n_estimators=10, max_features=1, random_state=1)
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(10, len(clf))
+    assert 10 == len(clf)
 
     # also test apply
     leaf_indices = clf.apply(X)
-    assert_equal(leaf_indices.shape, (len(X), clf.n_estimators))
+    assert leaf_indices.shape == (len(X), clf.n_estimators)
 
 
 @pytest.mark.parametrize('name', FOREST_CLASSIFIERS)
@@ -144,15 +144,15 @@ def check_iris_criterion(name, criterion):
                            random_state=1)
     clf.fit(iris.data, iris.target)
     score = clf.score(iris.data, iris.target)
-    assert_greater(score, 0.9, "Failed with criterion %s and score = %f"
-                               % (criterion, score))
+    assert score > 0.9, ("Failed with criterion %s and score = %f"
+                         % (criterion, score))
 
     clf = ForestClassifier(n_estimators=10, criterion=criterion,
                            max_features=2, random_state=1)
     clf.fit(iris.data, iris.target)
     score = clf.score(iris.data, iris.target)
-    assert_greater(score, 0.5, "Failed with criterion %s and score = %f"
-                               % (criterion, score))
+    assert score > 0.5, ("Failed with criterion %s and score = %f"
+                         % (criterion, score))
 
 
 @pytest.mark.parametrize('name', FOREST_CLASSIFIERS)
@@ -169,15 +169,15 @@ def check_boston_criterion(name, criterion):
                           random_state=1)
     clf.fit(boston.data, boston.target)
     score = clf.score(boston.data, boston.target)
-    assert_greater(score, 0.94, "Failed with max_features=None, criterion %s "
-                                "and score = %f" % (criterion, score))
+    assert score > 0.94, ("Failed with max_features=None, criterion %s "
+                          "and score = %f" % (criterion, score))
 
     clf = ForestRegressor(n_estimators=5, criterion=criterion,
                           max_features=6, random_state=1)
     clf.fit(boston.data, boston.target)
     score = clf.score(boston.data, boston.target)
-    assert_greater(score, 0.95, "Failed with max_features=6, criterion %s "
-                                "and score = %f" % (criterion, score))
+    assert score > 0.95, ("Failed with max_features=6, criterion %s "
+                          "and score = %f" % (criterion, score))
 
 
 @pytest.mark.parametrize('name', FOREST_REGRESSORS)
@@ -235,8 +235,8 @@ def check_importances(name, criterion, dtype, tolerance):
     # The forest estimator can detect that only the first 3 features of the
     # dataset are informative:
     n_important = np.sum(importances > 0.1)
-    assert_equal(importances.shape[0], 10)
-    assert_equal(n_important, 3)
+    assert importances.shape[0] == 10
+    assert n_important == 3
     assert np.all(importances[:3] > 0.1)
 
     # Check with parallel
@@ -257,7 +257,7 @@ def check_importances(name, criterion, dtype, tolerance):
                               criterion=criterion)
         est.fit(X, y, sample_weight=scale * sample_weight)
         importances_bis = est.feature_importances_
-        assert_less(np.abs(importances - importances_bis).mean(), tolerance)
+        assert np.abs(importances - importances_bis).mean() < tolerance
 
 
 @pytest.mark.parametrize('dtype', (np.float64, np.float32))
@@ -364,7 +364,7 @@ def mdi_importance(X_m, X, y):
 
     # Check correctness
     assert_almost_equal(entropy(y), sum(importances))
-    assert_less(np.abs(true_importances - importances).mean(), 0.01)
+    assert np.abs(true_importances - importances).mean() < 0.01
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -387,10 +387,10 @@ def check_oob_score(name, X, y, n_estimators=20):
     test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:])
 
     if name in FOREST_CLASSIFIERS:
-        assert_less(abs(test_score - est.oob_score_), 0.1)
+        assert abs(test_score - est.oob_score_) < 0.1
     else:
-        assert_greater(test_score, est.oob_score_)
-        assert_greater(est.oob_score_, .8)
+        assert test_score > est.oob_score_
+        assert est.oob_score_ > .8
 
     # Check warning if not enough estimators
     with np.errstate(divide="ignore", invalid="ignore"):
@@ -464,7 +464,7 @@ def check_parallel(name, X, y):
     forest = ForestEstimator(n_estimators=10, n_jobs=3, random_state=0)
 
     forest.fit(X, y)
-    assert_equal(len(forest), 10)
+    assert len(forest) == 10
 
     forest.set_params(n_jobs=1)
     y1 = forest.predict(X)
@@ -493,9 +493,9 @@ def check_pickle(name, X, y):
     pickle_object = pickle.dumps(obj)
 
     obj2 = pickle.loads(pickle_object)
-    assert_equal(type(obj2), obj.__class__)
+    assert type(obj2) == obj.__class__
     score2 = obj2.score(X, y)
-    assert_equal(score, score2)
+    assert score == score2
 
 
 @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS)
@@ -577,7 +577,7 @@ def check_classes_shape(name):
     # Classification, single output
     clf = ForestClassifier(random_state=0).fit(X, y)
 
-    assert_equal(clf.n_classes_, 2)
+    assert clf.n_classes_ == 2
     assert_array_equal(clf.classes_, [-1, 1])
 
     # Classification, multi-output
@@ -603,7 +603,7 @@ def test_random_trees_dense_type():
     X_transformed = hasher.fit_transform(X)
 
     # Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
-    assert_equal(type(X_transformed), np.ndarray)
+    assert type(X_transformed) == np.ndarray
 
 
 def test_random_trees_dense_equal():
@@ -640,13 +640,13 @@ def test_random_hasher():
                        X_transformed.toarray())
 
     # one leaf active per data point per forest
-    assert_equal(X_transformed.shape[0], X.shape[0])
+    assert X_transformed.shape[0] == X.shape[0]
     assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
     svd = TruncatedSVD(n_components=2)
     X_reduced = svd.fit_transform(X_transformed)
     linear_clf = LinearSVC()
     linear_clf.fit(X_reduced, y)
-    assert_equal(linear_clf.score(X_reduced, y), 1.)
+    assert linear_clf.score(X_reduced, y) == 1.
 
 
 def test_random_hasher_sparse_data():
@@ -700,13 +700,13 @@ def test_distribution():
     # are 5 ways to build a random tree. The more compact (0,1/0,0/--0,2/--) of
     # them has probability 1/3 while the 4 others have probability 1/6.
 
-    assert_equal(len(uniques), 5)
-    assert_greater(0.20, uniques[0][0])  # Rough approximation of 1/6.
-    assert_greater(0.20, uniques[1][0])
-    assert_greater(0.20, uniques[2][0])
-    assert_greater(0.20, uniques[3][0])
-    assert_greater(uniques[4][0], 0.3)
-    assert_equal(uniques[4][1], "0,1/0,0/--0,2/--")
+    assert len(uniques) == 5
+    assert 0.20 > uniques[0][0]  # Rough approximation of 1/6.
+    assert 0.20 > uniques[1][0]
+    assert 0.20 > uniques[2][0]
+    assert 0.20 > uniques[3][0]
+    assert uniques[4][0] > 0.3
+    assert uniques[4][1] == "0,1/0,0/--0,2/--"
 
     # Two variables, one with 2 values, one with 3 values
     X = np.empty((1000, 2))
@@ -725,7 +725,7 @@ def test_distribution():
         uniques[tree] += 1
 
     uniques = [(count, tree) for tree, count in uniques.items()]
-    assert_equal(len(uniques), 8)
+    assert len(uniques) == 8
 
 
 def check_max_leaf_nodes_max_depth(name):
@@ -735,11 +735,11 @@ def check_max_leaf_nodes_max_depth(name):
     ForestEstimator = FOREST_ESTIMATORS[name]
     est = ForestEstimator(max_depth=1, max_leaf_nodes=4,
                           n_estimators=1, random_state=0).fit(X, y)
-    assert_equal(est.estimators_[0].get_depth(), 1)
+    assert est.estimators_[0].get_depth() == 1
 
     est = ForestEstimator(max_depth=1, n_estimators=1,
                           random_state=0).fit(X, y)
-    assert_equal(est.estimators_[0].get_depth(), 1)
+    assert est.estimators_[0].get_depth() == 1
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -764,8 +764,7 @@ def check_min_samples_split(name):
     node_idx = est.estimators_[0].tree_.children_left != -1
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
-    assert_greater(np.min(node_samples), len(X) * 0.5 - 1,
-                   "Failed with {0}".format(name))
+    assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name)
 
     est = ForestEstimator(min_samples_split=0.5, n_estimators=1,
                           random_state=0)
@@ -773,8 +772,7 @@ def check_min_samples_split(name):
     node_idx = est.estimators_[0].tree_.children_left != -1
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
-    assert_greater(np.min(node_samples), len(X) * 0.5 - 1,
-                   "Failed with {0}".format(name))
+    assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name)
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -800,8 +798,7 @@ def check_min_samples_leaf(name):
     node_counts = np.bincount(out)
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
-    assert_greater(np.min(leaf_count), 4,
-                   "Failed with {0}".format(name))
+    assert np.min(leaf_count) > 4, "Failed with {0}".format(name)
 
     est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                           random_state=0)
@@ -810,8 +807,7 @@ def check_min_samples_leaf(name):
     node_counts = np.bincount(out)
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
-    assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
-                   "Failed with {0}".format(name))
+    assert np.min(leaf_count) > len(X) * 0.25 - 1, "Failed with {0}".format(name)
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -842,12 +838,12 @@ def check_min_weight_fraction_leaf(name):
         node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
-            np.min(leaf_weights),
-            total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
-                name, est.min_weight_fraction_leaf))
+        assert_message = ("Failed with {0} "
+                          "min_weight_fraction_leaf={1}".format(
+                              name, est.min_weight_fraction_leaf))
+        assert (
+            np.min(leaf_weights) >=
+            total_weight * est.min_weight_fraction_leaf), assert_message
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -1084,13 +1080,13 @@ def check_warm_start(name, random_state=42):
         else:
             clf_ws.set_params(n_estimators=n_estimators)
         clf_ws.fit(X, y)
-        assert_equal(len(clf_ws), n_estimators)
+        assert len(clf_ws) == n_estimators
 
     clf_no_ws = ForestEstimator(n_estimators=10, random_state=random_state,
                                 warm_start=False)
     clf_no_ws.fit(X, y)
 
-    assert_equal(set([tree.random_state for tree in clf_ws]),
+    assert (set([tree.random_state for tree in clf_ws]) ==
                  set([tree.random_state for tree in clf_no_ws]))
 
     assert_array_equal(clf_ws.apply(X), clf_no_ws.apply(X),
@@ -1182,7 +1178,7 @@ def check_warm_start_oob(name):
     clf_2.fit(X, y)
 
     assert hasattr(clf_2, 'oob_score_')
-    assert_equal(clf.oob_score_, clf_2.oob_score_)
+    assert clf.oob_score_ == clf_2.oob_score_
 
     # Test that oob_score is computed even if we don't need to train
     # additional trees.
@@ -1194,7 +1190,7 @@ def check_warm_start_oob(name):
     clf_3.set_params(oob_score=True)
     ignore_warnings(clf_3.fit)(X, y)
 
-    assert_equal(clf.oob_score_, clf_3.oob_score_)
+    assert clf.oob_score_ == clf_3.oob_score_
 
 
 @pytest.mark.parametrize('name', FOREST_CLASSIFIERS_REGRESSORS)
@@ -1222,8 +1218,8 @@ def check_decision_path(name):
     est.fit(X, y)
     indicator, n_nodes_ptr = est.decision_path(X)
 
-    assert_equal(indicator.shape[1], n_nodes_ptr[-1])
-    assert_equal(indicator.shape[0], n_samples)
+    assert indicator.shape[1] == n_nodes_ptr[-1]
+    assert indicator.shape[0] == n_samples
     assert_array_equal(np.diff(n_nodes_ptr),
                        [e.tree_.node_count for e in est.estimators_])
 
@@ -1252,7 +1248,7 @@ def test_min_impurity_split():
         est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                    est.fit, X, y)
         for tree in est.estimators_:
-            assert_equal(tree.min_impurity_split, 0.1)
+            assert tree.min_impurity_split == 0.1
 
 
 def test_min_impurity_decrease():
@@ -1266,7 +1262,7 @@ def test_min_impurity_decrease():
         for tree in est.estimators_:
             # Simply check if the parameter is passed on correctly. Tree tests
             # will suffice for the actual working of this param
-            assert_equal(tree.min_impurity_decrease, 0.1)
+            assert tree.min_impurity_decrease == 0.1
 
 
 class MyBackend(DEFAULT_JOBLIB_BACKEND):
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index 1f32d1afbb371..aa041073157a4 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -77,13 +77,13 @@ def check_classification_toy(presort, loss):
 
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(10, len(clf.estimators_))
+    assert 10 == len(clf.estimators_)
 
     deviance_decrease = (clf.train_score_[:-1] - clf.train_score_[1:])
     assert np.any(deviance_decrease >= 0.0)
 
     leaves = clf.apply(X)
-    assert_equal(leaves.shape, (6, 10, 1))
+    assert leaves.shape == (6, 10, 1)
 
 
 @pytest.mark.parametrize('presort', ('auto', True, False))
@@ -212,7 +212,7 @@ def check_classification_synthetic(presort, loss):
                                       learning_rate=1.0, random_state=0)
     gbrt.fit(X_train, y_train)
     error_rate = (1.0 - gbrt.score(X_test, y_test))
-    assert_less(error_rate, 0.09)
+    assert error_rate < 0.09
 
     gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=2,
                                       max_depth=1, loss=loss,
@@ -221,7 +221,7 @@ def check_classification_synthetic(presort, loss):
                                       presort=presort)
     gbrt.fit(X_train, y_train)
     error_rate = (1.0 - gbrt.score(X_test, y_test))
-    assert_less(error_rate, 0.08)
+    assert error_rate < 0.08
 
 
 @pytest.mark.parametrize('presort', ('auto', True, False))
@@ -248,11 +248,11 @@ def check_boston(presort, loss, subsample):
         clf.fit(boston.data, boston.target,
                 sample_weight=sample_weight)
         leaves = clf.apply(boston.data)
-        assert_equal(leaves.shape, (506, 100))
+        assert leaves.shape == (506, 100)
 
         y_pred = clf.predict(boston.data)
         mse = mean_squared_error(boston.target, y_pred)
-        assert_less(mse, 6.0)
+        assert mse < 6.0
 
         if last_y_pred is not None:
             assert_array_almost_equal(last_y_pred, y_pred)
@@ -276,10 +276,10 @@ def check_iris(presort, subsample, sample_weight):
                                      presort=presort)
     clf.fit(iris.data, iris.target, sample_weight=sample_weight)
     score = clf.score(iris.data, iris.target)
-    assert_greater(score, 0.9)
+    assert score > 0.9
 
     leaves = clf.apply(iris.data)
-    assert_equal(leaves.shape, (150, 100, 3))
+    assert leaves.shape == (150, 100, 3)
 
 
 @pytest.mark.parametrize('presort', ('auto', True, False))
@@ -310,7 +310,7 @@ def test_regression_synthetic():
         clf = GradientBoostingRegressor(presort=presort)
         clf.fit(X_train, y_train)
         mse = mean_squared_error(y_test, clf.predict(X_test))
-        assert_less(mse, 5.0)
+        assert mse < 5.0
 
     # Friedman2
     X, y = datasets.make_friedman2(n_samples=1200, random_state=random_state)
@@ -322,7 +322,7 @@ def test_regression_synthetic():
         clf = GradientBoostingRegressor(**regression_params)
         clf.fit(X_train, y_train)
         mse = mean_squared_error(y_test, clf.predict(X_test))
-        assert_less(mse, 1700.0)
+        assert mse < 1700.0
 
     # Friedman3
     X, y = datasets.make_friedman3(n_samples=1200, random_state=random_state)
@@ -334,7 +334,7 @@ def test_regression_synthetic():
         clf = GradientBoostingRegressor(**regression_params)
         clf.fit(X_train, y_train)
         mse = mean_squared_error(y_test, clf.predict(X_test))
-        assert_less(mse, 0.015)
+        assert mse < 0.015
 
 
 def test_feature_importances():
@@ -499,28 +499,28 @@ def test_max_feature_auto():
 
     gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
+    assert gbrt.max_features_ == int(np.sqrt(n_features))
 
     gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, n_features)
+    assert gbrt.max_features_ == n_features
 
     gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, int(n_features * 0.3))
+    assert gbrt.max_features_ == int(n_features * 0.3)
 
     gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))
+    assert gbrt.max_features_ == int(np.sqrt(n_features))
 
     gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, int(np.log2(n_features)))
+    assert gbrt.max_features_ == int(np.log2(n_features))
 
     gbrt = GradientBoostingRegressor(n_estimators=1,
                                      max_features=0.01 / X.shape[1])
     gbrt.fit(X_train, y_train)
-    assert_equal(gbrt.max_features_, 1)
+    assert gbrt.max_features_ == 1
 
 
 def test_staged_predict():
@@ -540,7 +540,7 @@ def test_staged_predict():
 
     # test if prediction for last stage equals ``predict``
     for y in clf.staged_predict(X_test):
-        assert_equal(y.shape, y_pred.shape)
+        assert y.shape == y_pred.shape
 
     assert_array_almost_equal(y_pred, y)
 
@@ -561,14 +561,14 @@ def test_staged_predict_proba():
 
     # test if prediction for last stage equals ``predict``
     for y_pred in clf.staged_predict(X_test):
-        assert_equal(y_test.shape, y_pred.shape)
+        assert y_test.shape == y_pred.shape
 
     assert_array_equal(clf.predict(X_test), y_pred)
 
     # test if prediction for last stage equals ``predict_proba``
     for staged_proba in clf.staged_predict_proba(X_test):
-        assert_equal(y_test.shape[0], staged_proba.shape[0])
-        assert_equal(2, staged_proba.shape[1])
+        assert y_test.shape[0] == staged_proba.shape[0]
+        assert 2 == staged_proba.shape[1]
 
     assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
 
@@ -598,7 +598,7 @@ def test_serialization():
 
     clf.fit(X, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
     try:
         import cPickle as pickle
@@ -609,7 +609,7 @@ def test_serialization():
     clf = None
     clf = pickle.loads(serialized_clf)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
 
 def test_degenerate_targets():
@@ -651,7 +651,7 @@ def test_symbol_labels():
 
     clf.fit(X, symbol_y)
     assert_array_equal(clf.predict(T), tosequence(map(str, true_result)))
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
 
 def test_float_class_labels():
@@ -663,7 +663,7 @@ def test_float_class_labels():
     clf.fit(X, float_y)
     assert_array_equal(clf.predict(T),
                        np.asarray(true_result, dtype=np.float32))
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
 
 def test_shape_y():
@@ -678,7 +678,7 @@ def test_shape_y():
     # later tests, and the tests that check for this warning fail
     assert_warns(DataConversionWarning, clf.fit, X, y_)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
 
 def test_mem_layout():
@@ -687,27 +687,27 @@ def test_mem_layout():
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X_, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
     X_ = np.ascontiguousarray(X)
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X_, y)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
     y_ = np.asarray(y, dtype=np.int32)
     y_ = np.ascontiguousarray(y_)
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X, y_)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
     y_ = np.asarray(y, dtype=np.int32)
     y_ = np.asfortranarray(y_)
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1)
     clf.fit(X, y_)
     assert_array_equal(clf.predict(T), true_result)
-    assert_equal(100, len(clf.estimators_))
+    assert 100 == len(clf.estimators_)
 
 
 def test_oob_improvement():
@@ -715,7 +715,7 @@ def test_oob_improvement():
     clf = GradientBoostingClassifier(n_estimators=100, random_state=1,
                                      subsample=0.5)
     clf.fit(X, y)
-    assert_equal(clf.oob_improvement_.shape[0], 100)
+    assert clf.oob_improvement_.shape[0] == 100
     # hard-coded regression test - change if modification in OOB computation
     assert_array_almost_equal(clf.oob_improvement_[:5],
                               np.array([0.19, 0.15, 0.12, -0.12, -0.11]),
@@ -736,8 +736,8 @@ def test_oob_multilcass_iris():
                                      random_state=1, subsample=0.5)
     clf.fit(iris.data, iris.target)
     score = clf.score(iris.data, iris.target)
-    assert_greater(score, 0.9)
-    assert_equal(clf.oob_improvement_.shape[0], clf.n_estimators)
+    assert score > 0.9
+    assert clf.oob_improvement_.shape[0] == clf.n_estimators
     # hard-coded regression test - change if modification in OOB computation
     # FIXME: the following snippet does not yield the same results on 32 bits
     # assert_array_almost_equal(clf.oob_improvement_[:5],
@@ -764,11 +764,11 @@ def test_verbose_output():
     # with OOB
     true_header = ' '.join(['%10s'] + ['%16s'] * 3) % (
         'Iter', 'Train Loss', 'OOB Improve', 'Remaining Time')
-    assert_equal(true_header, header)
+    assert true_header == header
 
     n_lines = sum(1 for l in verbose_output.readlines())
     # one for 1-10 and then 9 for 20-100
-    assert_equal(10 + 9, n_lines)
+    assert 10 + 9 == n_lines
 
 
 def test_more_verbose_output():
@@ -789,11 +789,11 @@ def test_more_verbose_output():
     # no OOB
     true_header = ' '.join(['%10s'] + ['%16s'] * 2) % (
         'Iter', 'Train Loss', 'Remaining Time')
-    assert_equal(true_header, header)
+    assert true_header == header
 
     n_lines = sum(1 for l in verbose_output.readlines())
     # 100 lines for n_estimators==100
-    assert_equal(100, n_lines)
+    assert 100 == n_lines
 
 
 @pytest.mark.parametrize('Cls', GRADIENT_BOOSTING_ESTIMATORS)
@@ -843,9 +843,9 @@ def test_warm_start_max_depth(Cls):
     est.fit(X, y)
 
     # last 10 trees have different depth
-    assert_equal(est.estimators_[0, 0].max_depth, 1)
+    assert est.estimators_[0, 0].max_depth == 1
     for i in range(1, 11):
-        assert_equal(est.estimators_[-i, 0].max_depth, 2)
+        assert est.estimators_[-i, 0].max_depth == 2
 
 
 @pytest.mark.parametrize('Cls', GRADIENT_BOOSTING_ESTIMATORS)
@@ -994,33 +994,33 @@ def test_monitor_early_stopping(Cls):
 
     est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5)
     est.fit(X, y, monitor=early_stopping_monitor)
-    assert_equal(est.n_estimators, 20)  # this is not altered
-    assert_equal(est.estimators_.shape[0], 10)
-    assert_equal(est.train_score_.shape[0], 10)
-    assert_equal(est.oob_improvement_.shape[0], 10)
+    assert est.n_estimators == 20  # this is not altered
+    assert est.estimators_.shape[0] == 10
+    assert est.train_score_.shape[0] == 10
+    assert est.oob_improvement_.shape[0] == 10
 
     # try refit
     est.set_params(n_estimators=30)
     est.fit(X, y)
-    assert_equal(est.n_estimators, 30)
-    assert_equal(est.estimators_.shape[0], 30)
-    assert_equal(est.train_score_.shape[0], 30)
+    assert est.n_estimators == 30
+    assert est.estimators_.shape[0] == 30
+    assert est.train_score_.shape[0] == 30
 
     est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5,
               warm_start=True)
     est.fit(X, y, monitor=early_stopping_monitor)
-    assert_equal(est.n_estimators, 20)
-    assert_equal(est.estimators_.shape[0], 10)
-    assert_equal(est.train_score_.shape[0], 10)
-    assert_equal(est.oob_improvement_.shape[0], 10)
+    assert est.n_estimators == 20
+    assert est.estimators_.shape[0] == 10
+    assert est.train_score_.shape[0] == 10
+    assert est.oob_improvement_.shape[0] == 10
 
     # try refit
     est.set_params(n_estimators=30, warm_start=False)
     est.fit(X, y)
-    assert_equal(est.n_estimators, 30)
-    assert_equal(est.train_score_.shape[0], 30)
-    assert_equal(est.estimators_.shape[0], 30)
-    assert_equal(est.oob_improvement_.shape[0], 30)
+    assert est.n_estimators == 30
+    assert est.train_score_.shape[0] == 30
+    assert est.estimators_.shape[0] == 30
+    assert est.oob_improvement_.shape[0] == 30
 
 
 def test_complete_classification():
@@ -1034,8 +1034,8 @@ def test_complete_classification():
     est.fit(X, y)
 
     tree = est.estimators_[0, 0].tree_
-    assert_equal(tree.max_depth, k)
-    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
+    assert tree.max_depth == k
+    assert (tree.children_left[tree.children_left == TREE_LEAF].shape[0] ==
                  k + 1)
 
 
@@ -1049,7 +1049,7 @@ def test_complete_regression():
     est.fit(boston.data, boston.target)
 
     tree = est.estimators_[-1, 0].tree_
-    assert_equal(tree.children_left[tree.children_left == TREE_LEAF].shape[0],
+    assert (tree.children_left[tree.children_left == TREE_LEAF].shape[0] ==
                  k + 1)
 
 
@@ -1077,7 +1077,7 @@ def test_zero_estimator_clf():
                                      random_state=1, init='zero')
     est.fit(X, y)
 
-    assert_greater(est.score(X, y), 0.96)
+    assert est.score(X, y) > 0.96
 
     # binary clf
     mask = y != 0
@@ -1086,7 +1086,7 @@ def test_zero_estimator_clf():
     est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                      random_state=1, init='zero')
     est.fit(X, y)
-    assert_greater(est.score(X, y), 0.96)
+    assert est.score(X, y) > 0.96
 
     est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                      random_state=1, init='foobar')
@@ -1102,11 +1102,11 @@ def test_max_leaf_nodes_max_depth(GBEstimator):
 
     est = GBEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y)
     tree = est.estimators_[0, 0].tree_
-    assert_equal(tree.max_depth, 1)
+    assert tree.max_depth == 1
 
     est = GBEstimator(max_depth=1).fit(X, y)
     tree = est.estimators_[0, 0].tree_
-    assert_equal(tree.max_depth, 1)
+    assert tree.max_depth == 1
 
 
 @pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS)
@@ -1119,7 +1119,7 @@ def test_min_impurity_split(GBEstimator):
     est = assert_warns_message(DeprecationWarning, "min_impurity_decrease",
                                est.fit, X, y)
     for tree in est.estimators_.flat:
-        assert_equal(tree.min_impurity_split, 0.1)
+        assert tree.min_impurity_split == 0.1
 
 
 @pytest.mark.parametrize('GBEstimator', GRADIENT_BOOSTING_ESTIMATORS)
@@ -1131,7 +1131,7 @@ def test_min_impurity_decrease(GBEstimator):
     for tree in est.estimators_.flat:
         # Simply check if the parameter is passed on correctly. Tree tests
         # will suffice for the actual working of this param
-        assert_equal(tree.min_impurity_decrease, 0.1)
+        assert tree.min_impurity_decrease == 0.1
 
 
 def test_warm_start_wo_nestimators_change():
@@ -1139,9 +1139,9 @@ def test_warm_start_wo_nestimators_change():
     # Regression test for #3513.
     clf = GradientBoostingClassifier(n_estimators=10, warm_start=True)
     clf.fit([[0, 1], [2, 3]], [0, 1])
-    assert_equal(clf.estimators_.shape[0], 10)
+    assert clf.estimators_.shape[0] == 10
     clf.fit([[0, 1], [2, 3]], [0, 1])
-    assert_equal(clf.estimators_.shape[0], 10)
+    assert clf.estimators_.shape[0] == 10
 
 
 def test_probability_exponential():
@@ -1178,7 +1178,7 @@ def test_non_uniform_weights_toy_edge_case_reg():
         gb = GradientBoostingRegressor(learning_rate=1.0, n_estimators=2,
                                        loss=loss)
         gb.fit(X, y, sample_weight=sample_weight)
-        assert_greater(gb.predict([[1, 0]])[0], 0.5)
+        assert gb.predict([[1, 0]])[0] > 0.5
 
 
 def test_non_uniform_weights_toy_edge_case_clf():
@@ -1271,7 +1271,7 @@ def test_gradient_boosting_early_stopping():
                                               (gbr, 1e-3, 28)):
         est.set_params(tol=tol)
         est.fit(X_train, y_train)
-        assert_equal(est.n_estimators_, early_stop_n_estimators)
+        assert est.n_estimators_ == early_stop_n_estimators
         assert est.score(X_test, y_test) > 0.7
 
     # Without early stopping
diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
index a82dbab4e7464..d7fbc4a986469 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
@@ -27,7 +27,7 @@ def test_binomial_deviance():
     bd = BinomialDeviance(2)
 
     # pred has the same BD for y in {0, 1}
-    assert_equal(bd(np.array([0.0]), np.array([0.0])),
+    assert (bd(np.array([0.0]), np.array([0.0])) ==
                  bd(np.array([1.0]), np.array([0.0])))
 
     assert_almost_equal(bd(np.array([1.0, 1.0, 1.0]),
@@ -93,12 +93,12 @@ def test_sample_weight_init_estimators():
         init_est = loss.init_estimator()
         init_est.fit(X, y)
         out = loss.get_init_raw_predictions(X, init_est)
-        assert_equal(out.shape, (y.shape[0], 1))
+        assert out.shape == (y.shape[0], 1)
 
         sw_init_est = loss.init_estimator()
         sw_init_est.fit(X, y, sample_weight=sample_weight)
         sw_out = loss.get_init_raw_predictions(X, sw_init_est)
-        assert_equal(sw_out.shape, (y.shape[0], 1))
+        assert sw_out.shape == (y.shape[0], 1)
 
         # check if predictions match
         assert_allclose(out, sw_out, rtol=1e-2)
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
index bd44421b2782c..298e0e422cce5 100644
--- a/sklearn/ensemble/tests/test_iforest.py
+++ b/sklearn/ensemble/tests/test_iforest.py
@@ -137,22 +137,22 @@ def test_recalculate_max_depth():
     X = iris.data
     clf = IsolationForest().fit(X)
     for est in clf.estimators_:
-        assert_equal(est.max_depth, int(np.ceil(np.log2(X.shape[0]))))
+        assert est.max_depth == int(np.ceil(np.log2(X.shape[0])))
 
 
 def test_max_samples_attribute():
     X = iris.data
     clf = IsolationForest().fit(X)
-    assert_equal(clf.max_samples_, X.shape[0])
+    assert clf.max_samples_ == X.shape[0]
 
     clf = IsolationForest(max_samples=500)
     assert_warns_message(UserWarning,
                          "max_samples will be set to n_samples for estimation",
                          clf.fit, X)
-    assert_equal(clf.max_samples_, X.shape[0])
+    assert clf.max_samples_ == X.shape[0]
 
     clf = IsolationForest(max_samples=0.4).fit(X)
-    assert_equal(clf.max_samples_, 0.4*X.shape[0])
+    assert clf.max_samples_ == 0.4*X.shape[0]
 
 
 def test_iforest_parallel_regression():
@@ -200,7 +200,7 @@ def test_iforest_performance():
     y_pred = - clf.decision_function(X_test)
 
     # check that there is at most 6 errors (false positive or false negative)
-    assert_greater(roc_auc_score(y_test, y_pred), 0.98)
+    assert roc_auc_score(y_test, y_pred) > 0.98
 
 
 @pytest.mark.parametrize("contamination", [0.25, "auto"])
@@ -214,7 +214,7 @@ def test_iforest_works(contamination):
     decision_func = -clf.decision_function(X)
     pred = clf.predict(X)
     # assert detect outliers:
-    assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
+    assert np.min(decision_func[-2:]) > np.max(decision_func[:-2])
     assert_array_equal(pred, 6 * [1] + 2 * [-1])
 
 
@@ -222,7 +222,7 @@ def test_max_samples_consistency():
     # Make sure validated max_samples in iforest and BaseBagging are identical
     X = iris.data
     clf = IsolationForest().fit(X)
-    assert_equal(clf.max_samples_, clf._max_samples)
+    assert clf.max_samples_ == clf._max_samples
 
 
 def test_iforest_subsampled_features():
diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
index 0122692202938..be29d1fbcff64 100644
--- a/sklearn/ensemble/tests/test_voting.py
+++ b/sklearn/ensemble/tests/test_voting.py
@@ -109,9 +109,9 @@ def test_tie_situation():
     clf2 = RandomForestClassifier(random_state=123)
     eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
                             voting='hard')
-    assert_equal(clf1.fit(X, y).predict(X)[73], 2)
-    assert_equal(clf2.fit(X, y).predict(X)[73], 1)
-    assert_equal(eclf.fit(X, y).predict(X)[73], 1)
+    assert clf1.fit(X, y).predict(X)[73] == 2
+    assert clf2.fit(X, y).predict(X)[73] == 1
+    assert eclf.fit(X, y).predict(X)[73] == 1
 
 
 def test_weights_iris():
@@ -174,21 +174,21 @@ def test_predict_on_toy_problem():
 
     y = np.array([1, 1, 1, 2, 2, 2])
 
-    assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
-    assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
-    assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
+    assert all(clf1.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
+    assert all(clf2.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
+    assert all(clf3.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
 
     eclf = VotingClassifier(estimators=[
                             ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                             voting='hard',
                             weights=[1, 1, 1])
-    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
+    assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
 
     eclf = VotingClassifier(estimators=[
                             ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
                             voting='soft',
                             weights=[1, 1, 1])
-    assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
+    assert all(eclf.fit(X, y).predict(X)) == all([1, 1, 1, 2, 2, 2])
 
 
 def test_predict_proba_on_toy_problem():
@@ -370,15 +370,15 @@ def test_set_params():
 
     assert_array_equal(eclf1.predict(X), eclf2.predict(X))
     assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
-    assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
-    assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())
+    assert eclf2.estimators[0][1].get_params() == clf1.get_params()
+    assert eclf2.estimators[1][1].get_params() == clf2.get_params()
 
     eclf1.set_params(lr__C=10.0)
     eclf2.set_params(nb__max_depth=5)
 
     assert eclf1.estimators[0][1].get_params()['C'] == 10.0
     assert eclf2.estimators[1][1].get_params()['max_depth'] == 5
-    assert_equal(eclf1.get_params()["lr__C"],
+    assert (eclf1.get_params()["lr__C"] ==
                  eclf1.get_params()["lr"].get_params()['C'])
 
 
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 5b1c38e16d00e..6f03754345519 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -91,8 +91,8 @@ def test_classification_toy():
         clf.fit(X, y_class)
         assert_array_equal(clf.predict(T), y_t_class)
         assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
-        assert_equal(clf.predict_proba(T).shape, (len(T), 2))
-        assert_equal(clf.decision_function(T).shape, (len(T),))
+        assert clf.predict_proba(T).shape == (len(T), 2)
+        assert clf.decision_function(T).shape == (len(T),)
 
 
 def test_regression_toy():
@@ -116,17 +116,17 @@ def test_iris():
         if alg == "SAMME":
             clf_samme = clf
             prob_samme = proba
-        assert_equal(proba.shape[1], len(classes))
-        assert_equal(clf.decision_function(iris.data).shape[1], len(classes))
+        assert proba.shape[1] == len(classes)
+        assert clf.decision_function(iris.data).shape[1] == len(classes)
 
         score = clf.score(iris.data, iris.target)
         assert score > 0.9, "Failed with algorithm %s and score = %f" % \
             (alg, score)
 
         # Check we used multiple estimators
-        assert_greater(len(clf.estimators_), 1)
+        assert len(clf.estimators_) > 1
         # Check for distinct random states (see issue #7408)
-        assert_equal(len(set(est.random_state for est in clf.estimators_)),
+        assert (len(set(est.random_state for est in clf.estimators_)) ==
                      len(clf.estimators_))
 
     # Somewhat hacky regression test: prior to
@@ -147,7 +147,7 @@ def test_boston():
     # Check we used multiple estimators
     assert len(reg.estimators_) > 1
     # Check for distinct random states (see issue #7408)
-    assert_equal(len(set(est.random_state for est in reg.estimators_)),
+    assert (len(set(est.random_state for est in reg.estimators_)) ==
                  len(reg.estimators_))
 
 
@@ -171,11 +171,11 @@ def test_staged_predict():
             s for s in clf.staged_score(
                 iris.data, iris.target, sample_weight=iris_weights)]
 
-        assert_equal(len(staged_predictions), 10)
+        assert len(staged_predictions) == 10
         assert_array_almost_equal(predictions, staged_predictions[-1])
-        assert_equal(len(staged_probas), 10)
+        assert len(staged_probas) == 10
         assert_array_almost_equal(proba, staged_probas[-1])
-        assert_equal(len(staged_scores), 10)
+        assert len(staged_scores) == 10
         assert_array_almost_equal(score, staged_scores[-1])
 
     # AdaBoost regression
@@ -189,9 +189,9 @@ def test_staged_predict():
         s for s in clf.staged_score(
             boston.data, boston.target, sample_weight=boston_weights)]
 
-    assert_equal(len(staged_predictions), 10)
+    assert len(staged_predictions) == 10
     assert_array_almost_equal(predictions, staged_predictions[-1])
-    assert_equal(len(staged_scores), 10)
+    assert len(staged_scores) == 10
     assert_array_almost_equal(score, staged_scores[-1])
 
 
@@ -226,9 +226,9 @@ def test_pickle():
         s = pickle.dumps(obj)
 
         obj2 = pickle.loads(s)
-        assert_equal(type(obj2), obj.__class__)
+        assert type(obj2) == obj.__class__
         score2 = obj2.score(iris.data, iris.target)
-        assert_equal(score, score2)
+        assert score == score2
 
     # Adaboost regressor
     obj = AdaBoostRegressor(random_state=0)
@@ -237,9 +237,9 @@ def test_pickle():
     s = pickle.dumps(obj)
 
     obj2 = pickle.loads(s)
-    assert_equal(type(obj2), obj.__class__)
+    assert type(obj2) == obj.__class__
     score2 = obj2.score(boston.data, boston.target)
-    assert_equal(score, score2)
+    assert score == score2
 
 
 def test_importances():
@@ -258,8 +258,8 @@ def test_importances():
         clf.fit(X, y)
         importances = clf.feature_importances_
 
-        assert_equal(importances.shape[0], 10)
-        assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
+        assert importances.shape[0] == 10
+        assert ((importances[:3, np.newaxis] >= importances[3:]).all() ==
                      True)
 
 
@@ -480,7 +480,7 @@ def predict(self, X):
 
     boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
     boost.fit(X, y_regr)
-    assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
+    assert len(boost.estimator_weights_) == len(boost.estimator_errors_)
 
 
 def test_multidimensional_X():

From 626b672b51c5e4496250100f090c084dcf7cbf1c Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:37:14 +0200
Subject: [PATCH 06/22] fix feature_extraction

---
 .../tests/test_dict_vectorizer.py             |  20 +-
 .../tests/test_feature_hasher.py              |  26 +--
 .../feature_extraction/tests/test_image.py    |  24 +-
 sklearn/feature_extraction/tests/test_text.py | 208 +++++++++---------
 4 files changed, 139 insertions(+), 139 deletions(-)

diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py
index 876f3083e3484..8acd0bcadb160 100644
--- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py
+++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py
@@ -27,10 +27,10 @@ def test_dictvectorizer(sparse, dtype, sort, iterable):
     v = DictVectorizer(sparse=sparse, dtype=dtype, sort=sort)
     X = v.fit_transform(iter(D) if iterable else D)
 
-    assert_equal(sp.issparse(X), sparse)
-    assert_equal(X.shape, (3, 5))
-    assert_equal(X.sum(), 14)
-    assert_equal(v.inverse_transform(X), D)
+    assert sp.issparse(X) == sparse
+    assert X.shape == (3, 5)
+    assert X.sum() == 14
+    assert v.inverse_transform(X) == D
 
     if sparse:
         # CSR matrices can't be compared for equality
@@ -41,7 +41,7 @@ def test_dictvectorizer(sparse, dtype, sort, iterable):
                                           else D))
 
     if sort:
-        assert_equal(v.feature_names_,
+        assert (v.feature_names_ ==
                      sorted(v.feature_names_))
 
 
@@ -59,7 +59,7 @@ def test_feature_selection():
         sel = SelectKBest(chi2, k=2).fit(X, [0, 1])
 
         v.restrict(sel.get_support(indices=indices), indices=indices)
-        assert_equal(v.get_feature_names(), ["useful1", "useful2"])
+        assert v.get_feature_names() == ["useful1", "useful2"]
 
 
 def test_one_of_k():
@@ -68,10 +68,10 @@ def test_one_of_k():
             {"version=3": True, "spam": -1}]
     v = DictVectorizer()
     X = v.fit_transform(D_in)
-    assert_equal(X.shape, (3, 5))
+    assert X.shape == (3, 5)
 
     D_out = v.inverse_transform(X)
-    assert_equal(D_out[0], {"version=1": 1, "ham": 2})
+    assert D_out[0] == {"version=1": 1, "ham": 2}
 
     names = v.get_feature_names()
     assert "version=2" in names
@@ -96,7 +96,7 @@ def test_unseen_or_no_features():
         try:
             v.transform([])
         except ValueError as e:
-            assert_in("empty", str(e))
+            assert "empty" in str(e)
 
 
 def test_deterministic_vocabulary():
@@ -111,4 +111,4 @@ def test_deterministic_vocabulary():
     v_1 = DictVectorizer().fit([d_sorted])
     v_2 = DictVectorizer().fit([d_shuffled])
 
-    assert_equal(v_1.vocabulary_, v_2.vocabulary_)
+    assert v_1.vocabulary_ == v_2.vocabulary_
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index 75cc907dd8f28..ae683df2ed933 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -11,7 +11,7 @@
 
 def test_feature_hasher_dicts():
     h = FeatureHasher(n_features=16)
-    assert_equal("dict", h.input_type)
+    assert "dict" == h.input_type
 
     raw_X = [{"foo": "bar", "dada": 42, "tzara": 37},
              {"foo": "baz", "gaga": "string1"}]
@@ -35,13 +35,13 @@ def test_feature_hasher_strings():
                           alternate_sign=False)
         X = h.transform(it)
 
-        assert_equal(X.shape[0], len(raw_X))
-        assert_equal(X.shape[1], n_features)
+        assert X.shape[0] == len(raw_X)
+        assert X.shape[1] == n_features
 
-        assert_equal(X[0].sum(), 4)
-        assert_equal(X[1].sum(), 3)
+        assert X[0].sum() == 4
+        assert X[1].sum() == 3
 
-        assert_equal(X.nnz, 6)
+        assert X.nnz == 6
 
 
 def test_feature_hasher_pairs():
@@ -51,8 +51,8 @@ def test_feature_hasher_pairs():
     x1, x2 = h.transform(raw_X).toarray()
     x1_nz = sorted(np.abs(x1[x1 != 0]))
     x2_nz = sorted(np.abs(x2[x2 != 0]))
-    assert_equal([1, 2], x1_nz)
-    assert_equal([1, 3, 4], x2_nz)
+    assert [1, 2] == x1_nz
+    assert [1, 3, 4] == x2_nz
 
 
 def test_feature_hasher_pairs_with_string_values():
@@ -62,16 +62,16 @@ def test_feature_hasher_pairs_with_string_values():
     x1, x2 = h.transform(raw_X).toarray()
     x1_nz = sorted(np.abs(x1[x1 != 0]))
     x2_nz = sorted(np.abs(x2[x2 != 0]))
-    assert_equal([1, 1], x1_nz)
-    assert_equal([1, 1, 4], x2_nz)
+    assert [1, 1] == x1_nz
+    assert [1, 1, 4] == x2_nz
 
     raw_X = (iter(d.items()) for d in [{"bax": "abc"},
                                        {"bax": "abc"}])
     x1, x2 = h.transform(raw_X).toarray()
     x1_nz = np.abs(x1[x1 != 0])
     x2_nz = np.abs(x2[x2 != 0])
-    assert_equal([1], x1_nz)
-    assert_equal([1], x2_nz)
+    assert [1] == x1_nz
+    assert [1] == x2_nz
     assert_array_equal(x1, x2)
 
 
@@ -107,7 +107,7 @@ def test_hasher_set_params():
 def test_hasher_zeros():
     # Assert that no zeros are materialized in the output.
     X = FeatureHasher().transform([{'foo': 0}])
-    assert_equal(X.data.shape, (0,))
+    assert X.data.shape == (0,)
 
 
 @ignore_warnings(category=DeprecationWarning)
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
index 92b18310469b4..d66e06a44029d 100644
--- a/sklearn/feature_extraction/tests/test_image.py
+++ b/sklearn/feature_extraction/tests/test_image.py
@@ -18,7 +18,7 @@ def test_img_to_graph():
     x, y = np.mgrid[:4, :4] - 10
     grad_x = img_to_graph(x)
     grad_y = img_to_graph(y)
-    assert_equal(grad_x.nnz, grad_y.nnz)
+    assert grad_x.nnz == grad_y.nnz
     # Negative elements are the diagonal: the elements of the original
     # image. Positive elements are the values of the gradient, they
     # should all be equal on grad_x and grad_y
@@ -68,7 +68,7 @@ def test_connect_regions():
     for thr in (50, 150):
         mask = face > thr
         graph = img_to_graph(face, mask)
-        assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
+        assert ndimage.label(mask)[1] == connected_components(graph)[0]
 
 
 @ignore_warnings(category=DeprecationWarning)  # scipy deprecation inside face
@@ -85,11 +85,11 @@ def test_connect_regions_with_grid():
 
     mask = face > 50
     graph = grid_to_graph(*face.shape, mask=mask)
-    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
+    assert ndimage.label(mask)[1] == connected_components(graph)[0]
 
     mask = face > 150
     graph = grid_to_graph(*face.shape, mask=mask, dtype=None)
-    assert_equal(ndimage.label(mask)[1], connected_components(graph)[0])
+    assert ndimage.label(mask)[1] == connected_components(graph)[0]
 
 
 def _downsampled_face():
@@ -138,7 +138,7 @@ def test_extract_patches_all():
     p_h, p_w = 16, 16
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
     patches = extract_patches_2d(face, (p_h, p_w))
-    assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
 
 
 def test_extract_patches_all_color():
@@ -147,7 +147,7 @@ def test_extract_patches_all_color():
     p_h, p_w = 16, 16
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
     patches = extract_patches_2d(face, (p_h, p_w))
-    assert_equal(patches.shape, (expected_n_patches, p_h, p_w, 3))
+    assert patches.shape == (expected_n_patches, p_h, p_w, 3)
 
 
 def test_extract_patches_all_rect():
@@ -158,7 +158,7 @@ def test_extract_patches_all_rect():
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
 
     patches = extract_patches_2d(face, (p_h, p_w))
-    assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
 
 
 def test_extract_patches_max_patches():
@@ -167,11 +167,11 @@ def test_extract_patches_max_patches():
     p_h, p_w = 16, 16
 
     patches = extract_patches_2d(face, (p_h, p_w), max_patches=100)
-    assert_equal(patches.shape, (100, p_h, p_w))
+    assert patches.shape == (100, p_h, p_w)
 
     expected_n_patches = int(0.5 * (i_h - p_h + 1) * (i_w - p_w + 1))
     patches = extract_patches_2d(face, (p_h, p_w), max_patches=0.5)
-    assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
 
     assert_raises(ValueError, extract_patches_2d, face, (p_h, p_w),
                   max_patches=2.0)
@@ -184,7 +184,7 @@ def test_extract_patch_same_size_image():
     # Request patches of the same size as image
     # Should return just the single patch a.k.a. the image
     patches = extract_patches_2d(face, face.shape, max_patches=2)
-    assert_equal(patches.shape[0], 1)
+    assert patches.shape[0] == 1
 
 
 def test_extract_patches_less_than_max_patches():
@@ -195,7 +195,7 @@ def test_extract_patches_less_than_max_patches():
     expected_n_patches = (i_h - p_h + 1) * (i_w - p_w + 1)
 
     patches = extract_patches_2d(face, (p_h, p_w), max_patches=4000)
-    assert_equal(patches.shape, (expected_n_patches, p_h, p_w))
+    assert patches.shape == (expected_n_patches, p_h, p_w)
 
 
 def test_reconstruct_patches_perfect():
@@ -247,7 +247,7 @@ def test_patch_extractor_max_patches_default():
     faces = face_collection
     extr = PatchExtractor(max_patches=100, random_state=0)
     patches = extr.transform(faces)
-    assert_equal(patches.shape, (len(faces) * 100, 19, 25))
+    assert patches.shape == (len(faces) * 100, 19, 25)
 
 
 def test_patch_extractor_all_patches():
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 73a240547d81d..0bca5b5506253 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -83,42 +83,42 @@ def test_strip_accents():
     # check some classical latin accentuated symbols
     a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
-    assert_equal(strip_accents_unicode(a), expected)
+    assert strip_accents_unicode(a) == expected
 
     a = 'ìíîïñòóôõöùúûüý'
     expected = 'iiiinooooouuuuy'
-    assert_equal(strip_accents_unicode(a), expected)
+    assert strip_accents_unicode(a) == expected
 
     # check some arabic
     a = '\u0625'  # alef with a hamza below: إ
     expected = '\u0627'  # simple alef: ا
-    assert_equal(strip_accents_unicode(a), expected)
+    assert strip_accents_unicode(a) == expected
 
     # mix letters accentuated and not
     a = "this is à test"
     expected = 'this is a test'
-    assert_equal(strip_accents_unicode(a), expected)
+    assert strip_accents_unicode(a) == expected
 
 
 def test_to_ascii():
     # check some classical latin accentuated symbols
     a = 'àáâãäåçèéêë'
     expected = 'aaaaaaceeee'
-    assert_equal(strip_accents_ascii(a), expected)
+    assert strip_accents_ascii(a) == expected
 
     a = "ìíîïñòóôõöùúûüý"
     expected = 'iiiinooooouuuuy'
-    assert_equal(strip_accents_ascii(a), expected)
+    assert strip_accents_ascii(a) == expected
 
     # check some arabic
     a = '\u0625'  # halef with a hamza below
     expected = ''  # halef has no direct ascii match
-    assert_equal(strip_accents_ascii(a), expected)
+    assert strip_accents_ascii(a) == expected
 
     # mix letters accentuated and not
     a = "this is à test"
     expected = 'this is a test'
-    assert_equal(strip_accents_ascii(a), expected)
+    assert strip_accents_ascii(a) == expected
 
 
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, HashingVectorizer))
@@ -128,18 +128,18 @@ def test_word_analyzer_unigrams(Vectorizer):
             "c'était pas très bon.")
     expected = ['ai', 'mange', 'du', 'kangourou', 'ce', 'midi',
                 'etait', 'pas', 'tres', 'bon']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
     text = "This is a test, really.\n\n I met Harry yesterday."
     expected = ['this', 'is', 'test', 'really', 'met', 'harry',
                 'yesterday']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
     wa = Vectorizer(input='file').build_analyzer()
     text = StringIO("This is a test with a file-like object!")
     expected = ['this', 'is', 'test', 'with', 'file', 'like',
                 'object']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
     # with custom preprocessor
     wa = Vectorizer(preprocessor=uppercase).build_analyzer()
@@ -147,7 +147,7 @@ def test_word_analyzer_unigrams(Vectorizer):
             " c'était pas très bon.")
     expected = ['AI', 'MANGE', 'DU', 'KANGOUROU', 'CE', 'MIDI',
                 'ETAIT', 'PAS', 'TRES', 'BON']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
     # with custom tokenizer
     wa = Vectorizer(tokenizer=split_tokenize,
@@ -156,7 +156,7 @@ def test_word_analyzer_unigrams(Vectorizer):
             "c'était pas très bon.")
     expected = ["j'ai", 'mange', 'du', 'kangourou', 'ce', 'midi,',
                 "c'etait", 'pas', 'tres', 'bon.']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
 
 def test_word_analyzer_unigrams_and_bigrams():
@@ -168,7 +168,7 @@ def test_word_analyzer_unigrams_and_bigrams():
                 'etait', 'pas', 'tres', 'bon', 'ai mange', 'mange du',
                 'du kangourou', 'kangourou ce', 'ce midi', 'midi etait',
                 'etait pas', 'pas tres', 'tres bon']
-    assert_equal(wa(text), expected)
+    assert wa(text) == expected
 
 
 def test_unicode_decode_error():
@@ -193,22 +193,22 @@ def test_char_ngram_analyzer():
 
     text = "J'ai mangé du kangourou  ce midi, c'était pas très bon"
     expected = ["j'a", "'ai", 'ai ', 'i m', ' ma']
-    assert_equal(cnga(text)[:5], expected)
+    assert cnga(text)[:5] == expected
     expected = ['s tres', ' tres ', 'tres b', 'res bo', 'es bon']
-    assert_equal(cnga(text)[-5:], expected)
+    assert cnga(text)[-5:] == expected
 
     text = "This \n\tis a test, really.\n\n I met Harry yesterday"
     expected = ['thi', 'his', 'is ', 's i', ' is']
-    assert_equal(cnga(text)[:5], expected)
+    assert cnga(text)[:5] == expected
 
     expected = [' yeste', 'yester', 'esterd', 'sterda', 'terday']
-    assert_equal(cnga(text)[-5:], expected)
+    assert cnga(text)[-5:] == expected
 
     cnga = CountVectorizer(input='file', analyzer='char',
                            ngram_range=(3, 6)).build_analyzer()
     text = StringIO("This is a test with a file-like object!")
     expected = ['thi', 'his', 'is ', 's i', ' is']
-    assert_equal(cnga(text)[:5], expected)
+    assert cnga(text)[:5] == expected
 
 
 def test_char_wb_ngram_analyzer():
@@ -217,16 +217,16 @@ def test_char_wb_ngram_analyzer():
 
     text = "This \n\tis a test, really.\n\n I met Harry yesterday"
     expected = [' th', 'thi', 'his', 'is ', ' thi']
-    assert_equal(cnga(text)[:5], expected)
+    assert cnga(text)[:5] == expected
 
     expected = ['yester', 'esterd', 'sterda', 'terday', 'erday ']
-    assert_equal(cnga(text)[-5:], expected)
+    assert cnga(text)[-5:] == expected
 
     cnga = CountVectorizer(input='file', analyzer='char_wb',
                            ngram_range=(3, 6)).build_analyzer()
     text = StringIO("A test with a file-like object!")
     expected = [' a ', ' te', 'tes', 'est', 'st ', ' tes']
-    assert_equal(cnga(text)[:6], expected)
+    assert cnga(text)[:6] == expected
 
 
 def test_word_ngram_analyzer():
@@ -235,17 +235,17 @@ def test_word_ngram_analyzer():
 
     text = "This \n\tis a test, really.\n\n I met Harry yesterday"
     expected = ['this is test', 'is test really', 'test really met']
-    assert_equal(cnga(text)[:3], expected)
+    assert cnga(text)[:3] == expected
 
     expected = ['test really met harry yesterday',
                 'this is test really met harry',
                 'is test really met harry yesterday']
-    assert_equal(cnga(text)[-3:], expected)
+    assert cnga(text)[-3:] == expected
 
     cnga_file = CountVectorizer(input='file', analyzer='word',
                                 ngram_range=(3, 6)).build_analyzer()
     file = StringIO(text)
-    assert_equal(cnga_file(file), cnga(text))
+    assert cnga_file(file) == cnga(text)
 
 
 def test_countvectorizer_custom_vocabulary():
@@ -258,11 +258,11 @@ def test_countvectorizer_custom_vocabulary():
         vect = CountVectorizer(vocabulary=v)
         vect.fit(JUNK_FOOD_DOCS)
         if isinstance(v, Mapping):
-            assert_equal(vect.vocabulary_, vocab)
+            assert vect.vocabulary_ == vocab
         else:
-            assert_equal(set(vect.vocabulary_), terms)
+            assert set(vect.vocabulary_) == terms
         X = vect.transform(JUNK_FOOD_DOCS)
-        assert_equal(X.shape[1], len(terms))
+        assert X.shape[1] == len(terms)
 
 
 def test_countvectorizer_custom_vocabulary_pipeline():
@@ -271,9 +271,9 @@ def test_countvectorizer_custom_vocabulary_pipeline():
         ('count', CountVectorizer(vocabulary=what_we_like)),
         ('tfidf', TfidfTransformer())])
     X = pipe.fit_transform(ALL_FOOD_DOCS)
-    assert_equal(set(pipe.named_steps['count'].vocabulary_),
+    assert (set(pipe.named_steps['count'].vocabulary_) ==
                  set(what_we_like))
-    assert_equal(X.shape[1], len(what_we_like))
+    assert X.shape[1] == len(what_we_like)
 
 
 def test_countvectorizer_custom_vocabulary_repeated_indices():
@@ -281,7 +281,7 @@ def test_countvectorizer_custom_vocabulary_repeated_indices():
     try:
         CountVectorizer(vocabulary=vocab)
     except ValueError as e:
-        assert_in("vocabulary contains repeated indices", str(e).lower())
+        assert "vocabulary contains repeated indices" in str(e).lower()
 
 
 def test_countvectorizer_custom_vocabulary_gap_index():
@@ -289,20 +289,20 @@ def test_countvectorizer_custom_vocabulary_gap_index():
     try:
         CountVectorizer(vocabulary=vocab)
     except ValueError as e:
-        assert_in("doesn't contain index", str(e).lower())
+        assert "doesn't contain index" in str(e).lower()
 
 
 def test_countvectorizer_stop_words():
     cv = CountVectorizer()
     cv.set_params(stop_words='english')
-    assert_equal(cv.get_stop_words(), ENGLISH_STOP_WORDS)
+    assert cv.get_stop_words() == ENGLISH_STOP_WORDS
     cv.set_params(stop_words='_bad_str_stop_')
     assert_raises(ValueError, cv.get_stop_words)
     cv.set_params(stop_words='_bad_unicode_stop_')
     assert_raises(ValueError, cv.get_stop_words)
     stoplist = ['some', 'other', 'words']
     cv.set_params(stop_words=stoplist)
-    assert_equal(cv.get_stop_words(), set(stoplist))
+    assert cv.get_stop_words() == set(stoplist)
 
 
 def test_countvectorizer_empty_vocabulary():
@@ -311,7 +311,7 @@ def test_countvectorizer_empty_vocabulary():
         vect.fit(["foo"])
         assert False, "we shouldn't get here"
     except ValueError as e:
-        assert_in("empty vocabulary", str(e).lower())
+        assert "empty vocabulary" in str(e).lower()
 
     try:
         v = CountVectorizer(max_df=1.0, stop_words="english")
@@ -319,14 +319,14 @@ def test_countvectorizer_empty_vocabulary():
         v.fit(["to be or not to be", "and me too", "and so do you"])
         assert False, "we shouldn't get here"
     except ValueError as e:
-        assert_in("empty vocabulary", str(e).lower())
+        assert "empty vocabulary" in str(e).lower()
 
 
 def test_fit_countvectorizer_twice():
     cv = CountVectorizer()
     X1 = cv.fit_transform(ALL_FOOD_DOCS[:5])
     X2 = cv.fit_transform(ALL_FOOD_DOCS[5:])
-    assert_not_equal(X1.shape[1], X2.shape[1])
+    assert X1.shape[1] != X2.shape[1]
 
 
 def test_tf_idf_smoothing():
@@ -383,11 +383,11 @@ def test_sublinear_tf():
     X = [[1], [2], [3]]
     tr = TfidfTransformer(sublinear_tf=True, use_idf=False, norm=None)
     tfidf = tr.fit_transform(X).toarray()
-    assert_equal(tfidf[0], 1)
-    assert_greater(tfidf[1], tfidf[0])
-    assert_greater(tfidf[2], tfidf[1])
-    assert_less(tfidf[1], 2)
-    assert_less(tfidf[2], 3)
+    assert tfidf[0] == 1
+    assert tfidf[1] > tfidf[0]
+    assert tfidf[2] > tfidf[1]
+    assert tfidf[1] < 2
+    assert tfidf[2] < 3
 
 
 def test_vectorizer():
@@ -401,7 +401,7 @@ def test_vectorizer():
     counts_train = v1.fit_transform(train_data)
     if hasattr(counts_train, 'tocsr'):
         counts_train = counts_train.tocsr()
-    assert_equal(counts_train[0, v1.vocabulary_["pizza"]], 2)
+    assert counts_train[0, v1.vocabulary_["pizza"]] == 2
 
     # build a vectorizer v1 with the same vocabulary as the one fitted by v1
     v2 = CountVectorizer(vocabulary=v1.vocabulary_)
@@ -413,9 +413,9 @@ def test_vectorizer():
             counts_test = counts_test.tocsr()
 
         vocabulary = v.vocabulary_
-        assert_equal(counts_test[0, vocabulary["salad"]], 1)
-        assert_equal(counts_test[0, vocabulary["tomato"]], 1)
-        assert_equal(counts_test[0, vocabulary["water"]], 1)
+        assert counts_test[0, vocabulary["salad"]] == 1
+        assert counts_test[0, vocabulary["tomato"]] == 1
+        assert counts_test[0, vocabulary["water"]] == 1
 
         # stop word from the fixed list
         assert "the" not in vocabulary
@@ -427,20 +427,20 @@ def test_vectorizer():
         assert "copyright" not in vocabulary
 
         # not present in the sample
-        assert_equal(counts_test[0, vocabulary["coke"]], 0)
-        assert_equal(counts_test[0, vocabulary["burger"]], 0)
-        assert_equal(counts_test[0, vocabulary["beer"]], 0)
-        assert_equal(counts_test[0, vocabulary["pizza"]], 0)
+        assert counts_test[0, vocabulary["coke"]] == 0
+        assert counts_test[0, vocabulary["burger"]] == 0
+        assert counts_test[0, vocabulary["beer"]] == 0
+        assert counts_test[0, vocabulary["pizza"]] == 0
 
     # test tf-idf
     t1 = TfidfTransformer(norm='l1')
     tfidf = t1.fit(counts_train).transform(counts_train).toarray()
-    assert_equal(len(t1.idf_), len(v1.vocabulary_))
-    assert_equal(tfidf.shape, (n_train, len(v1.vocabulary_)))
+    assert len(t1.idf_) == len(v1.vocabulary_)
+    assert tfidf.shape == (n_train, len(v1.vocabulary_))
 
     # test tf-idf with new data
     tfidf_test = t1.transform(counts_test).toarray()
-    assert_equal(tfidf_test.shape, (len(test_data), len(v1.vocabulary_)))
+    assert tfidf_test.shape == (len(test_data), len(v1.vocabulary_))
 
     # test tf alone
     t2 = TfidfTransformer(norm='l1', use_idf=False)
@@ -482,7 +482,7 @@ def test_vectorizer():
 
     # ascii preprocessor?
     v3.set_params(strip_accents='ascii', lowercase=False)
-    assert_equal(v3.build_preprocessor(), strip_accents_ascii)
+    assert v3.build_preprocessor() == strip_accents_ascii
 
     # error on bad strip_accents param
     v3.set_params(strip_accents='_gabbledegook_', preprocessor=None)
@@ -497,7 +497,7 @@ def test_tfidf_vectorizer_setters():
     tv = TfidfVectorizer(norm='l2', use_idf=False, smooth_idf=False,
                          sublinear_tf=False)
     tv.norm = 'l1'
-    assert_equal(tv._tfidf.norm, 'l1')
+    assert tv._tfidf.norm == 'l1'
     tv.use_idf = True
     assert tv._tfidf.use_idf
     tv.smooth_idf = True
@@ -511,8 +511,8 @@ def test_hashing_vectorizer():
     v = HashingVectorizer()
     X = v.transform(ALL_FOOD_DOCS)
     token_nnz = X.nnz
-    assert_equal(X.shape, (len(ALL_FOOD_DOCS), v.n_features))
-    assert_equal(X.dtype, v.dtype)
+    assert X.shape == (len(ALL_FOOD_DOCS), v.n_features)
+    assert X.dtype == v.dtype
 
     # By default the hashed values receive a random sign and l2 normalization
     # makes the feature values bounded
@@ -528,8 +528,8 @@ def test_hashing_vectorizer():
     # Check vectorization with some non-default parameters
     v = HashingVectorizer(ngram_range=(1, 2), norm='l1')
     X = v.transform(ALL_FOOD_DOCS)
-    assert_equal(X.shape, (len(ALL_FOOD_DOCS), v.n_features))
-    assert_equal(X.dtype, v.dtype)
+    assert X.shape == (len(ALL_FOOD_DOCS), v.n_features)
+    assert X.dtype == v.dtype
 
     # ngrams generate more non zeros
     ngrams_nnz = X.nnz
@@ -555,16 +555,16 @@ def test_feature_names():
     # test for vocabulary learned from data
     X = cv.fit_transform(ALL_FOOD_DOCS)
     n_samples, n_features = X.shape
-    assert_equal(len(cv.vocabulary_), n_features)
+    assert len(cv.vocabulary_) == n_features
 
     feature_names = cv.get_feature_names()
-    assert_equal(len(feature_names), n_features)
+    assert len(feature_names) == n_features
     assert_array_equal(['beer', 'burger', 'celeri', 'coke', 'pizza',
                         'salad', 'sparkling', 'tomato', 'water'],
                        feature_names)
 
     for idx, name in enumerate(feature_names):
-        assert_equal(idx, cv.vocabulary_.get(name))
+        assert idx == cv.vocabulary_.get(name)
 
     # test for custom vocabulary
     vocab = ['beer', 'burger', 'celeri', 'coke', 'pizza',
@@ -577,7 +577,7 @@ def test_feature_names():
     assert cv.fixed_vocabulary_
 
     for idx, name in enumerate(feature_names):
-        assert_equal(idx, cv.vocabulary_.get(name))
+        assert idx == cv.vocabulary_.get(name)
 
 
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
@@ -589,8 +589,8 @@ def test_vectorizer_max_features(Vectorizer):
     # test bounded number of extracted features
     vectorizer = Vectorizer(max_df=0.6, max_features=4)
     vectorizer.fit(ALL_FOOD_DOCS)
-    assert_equal(set(vectorizer.vocabulary_), expected_vocabulary)
-    assert_equal(vectorizer.stop_words_, expected_stop_words)
+    assert set(vectorizer.vocabulary_) == expected_vocabulary
+    assert vectorizer.stop_words_ == expected_stop_words
 
 
 def test_count_vectorizer_max_features():
@@ -609,14 +609,14 @@ def test_count_vectorizer_max_features():
     features_None = cv_None.get_feature_names()
 
     # The most common feature is "the", with frequency 7.
-    assert_equal(7, counts_1.max())
-    assert_equal(7, counts_3.max())
-    assert_equal(7, counts_None.max())
+    assert 7 == counts_1.max()
+    assert 7 == counts_3.max()
+    assert 7 == counts_None.max()
 
     # The most common feature should be the same
-    assert_equal("the", features_1[np.argmax(counts_1)])
-    assert_equal("the", features_3[np.argmax(counts_3)])
-    assert_equal("the", features_None[np.argmax(counts_None)])
+    assert "the" == features_1[np.argmax(counts_1)]
+    assert "the" == features_3[np.argmax(counts_3)]
+    assert "the" == features_None[np.argmax(counts_None)]
 
 
 def test_vectorizer_max_df():
@@ -624,22 +624,22 @@ def test_vectorizer_max_df():
     vect = CountVectorizer(analyzer='char', max_df=1.0)
     vect.fit(test_data)
     assert 'a' in vect.vocabulary_.keys()
-    assert_equal(len(vect.vocabulary_.keys()), 6)
-    assert_equal(len(vect.stop_words_), 0)
+    assert len(vect.vocabulary_.keys()) == 6
+    assert len(vect.stop_words_) == 0
 
     vect.max_df = 0.5  # 0.5 * 3 documents -> max_doc_count == 1.5
     vect.fit(test_data)
     assert 'a' not in vect.vocabulary_.keys()  # {ae} ignored
-    assert_equal(len(vect.vocabulary_.keys()), 4)    # {bcdt} remain
+    assert len(vect.vocabulary_.keys()) == 4    # {bcdt} remain
     assert 'a' in vect.stop_words_
-    assert_equal(len(vect.stop_words_), 2)
+    assert len(vect.stop_words_) == 2
 
     vect.max_df = 1
     vect.fit(test_data)
     assert 'a' not in vect.vocabulary_.keys()  # {ae} ignored
-    assert_equal(len(vect.vocabulary_.keys()), 4)    # {bcdt} remain
+    assert len(vect.vocabulary_.keys()) == 4    # {bcdt} remain
     assert 'a' in vect.stop_words_
-    assert_equal(len(vect.stop_words_), 2)
+    assert len(vect.stop_words_) == 2
 
 
 def test_vectorizer_min_df():
@@ -647,22 +647,22 @@ def test_vectorizer_min_df():
     vect = CountVectorizer(analyzer='char', min_df=1)
     vect.fit(test_data)
     assert 'a' in vect.vocabulary_.keys()
-    assert_equal(len(vect.vocabulary_.keys()), 6)
-    assert_equal(len(vect.stop_words_), 0)
+    assert len(vect.vocabulary_.keys()) == 6
+    assert len(vect.stop_words_) == 0
 
     vect.min_df = 2
     vect.fit(test_data)
     assert 'c' not in vect.vocabulary_.keys()  # {bcdt} ignored
-    assert_equal(len(vect.vocabulary_.keys()), 2)    # {ae} remain
+    assert len(vect.vocabulary_.keys()) == 2    # {ae} remain
     assert 'c' in vect.stop_words_
-    assert_equal(len(vect.stop_words_), 4)
+    assert len(vect.stop_words_) == 4
 
     vect.min_df = 0.8  # 0.8 * 3 documents -> min_doc_count == 2.4
     vect.fit(test_data)
     assert 'c' not in vect.vocabulary_.keys()  # {bcdet} ignored
-    assert_equal(len(vect.vocabulary_.keys()), 1)    # {a} remains
+    assert len(vect.vocabulary_.keys()) == 1    # {a} remains
     assert 'c' in vect.stop_words_
-    assert_equal(len(vect.stop_words_), 5)
+    assert len(vect.stop_words_) == 5
 
 
 def test_count_binary_occurrences():
@@ -685,7 +685,7 @@ def test_count_binary_occurrences():
     vect = CountVectorizer(analyzer='char', max_df=1.0,
                            binary=True, dtype=np.float32)
     X_sparse = vect.fit_transform(test_data)
-    assert_equal(X_sparse.dtype, np.float32)
+    assert X_sparse.dtype == np.float32
 
 
 @fails_if_pypy
@@ -694,23 +694,23 @@ def test_hashed_binary_occurrences():
     test_data = ['aaabc', 'abbde']
     vect = HashingVectorizer(alternate_sign=False, analyzer='char', norm=None)
     X = vect.transform(test_data)
-    assert_equal(np.max(X[0:1].data), 3)
-    assert_equal(np.max(X[1:2].data), 2)
-    assert_equal(X.dtype, np.float64)
+    assert np.max(X[0:1].data) == 3
+    assert np.max(X[1:2].data) == 2
+    assert X.dtype == np.float64
 
     # using boolean features, we can fetch the binary occurrence info
     # instead.
     vect = HashingVectorizer(analyzer='char', alternate_sign=False,
                              binary=True, norm=None)
     X = vect.transform(test_data)
-    assert_equal(np.max(X.data), 1)
-    assert_equal(X.dtype, np.float64)
+    assert np.max(X.data) == 1
+    assert X.dtype == np.float64
 
     # check the ability to change the dtype
     vect = HashingVectorizer(analyzer='char', alternate_sign=False,
                              binary=True, norm=None, dtype=np.float64)
     X = vect.transform(test_data)
-    assert_equal(X.dtype, np.float64)
+    assert X.dtype == np.float64
 
 
 @pytest.mark.parametrize('Vectorizer', (CountVectorizer, TfidfVectorizer))
@@ -764,9 +764,9 @@ def test_count_vectorizer_pipeline_grid_selection():
     # on this toy dataset bigram representation which is used in the last of
     # the grid_search is considered the best estimator since they all converge
     # to 100% accuracy models
-    assert_equal(grid_search.best_score_, 1.0)
+    assert grid_search.best_score_ == 1.0
     best_vectorizer = grid_search.best_estimator_.named_steps['vect']
-    assert_equal(best_vectorizer.ngram_range, (1, 1))
+    assert best_vectorizer.ngram_range == (1, 1)
 
 
 def test_vectorizer_pipeline_grid_selection():
@@ -801,10 +801,10 @@ def test_vectorizer_pipeline_grid_selection():
     # on this toy dataset bigram representation which is used in the last of
     # the grid_search is considered the best estimator since they all converge
     # to 100% accuracy models
-    assert_equal(grid_search.best_score_, 1.0)
+    assert grid_search.best_score_ == 1.0
     best_vectorizer = grid_search.best_estimator_.named_steps['vect']
-    assert_equal(best_vectorizer.ngram_range, (1, 1))
-    assert_equal(best_vectorizer.norm, 'l2')
+    assert best_vectorizer.ngram_range == (1, 1)
+    assert best_vectorizer.norm == 'l2'
     assert not best_vectorizer.fixed_vocabulary_
 
 
@@ -833,14 +833,14 @@ def test_vectorizer_unicode():
 
     vect = CountVectorizer()
     X_counted = vect.fit_transform([document])
-    assert_equal(X_counted.shape, (1, 12))
+    assert X_counted.shape == (1, 12)
 
     vect = HashingVectorizer(norm=None, alternate_sign=False)
     X_hashed = vect.transform([document])
-    assert_equal(X_hashed.shape, (1, 2 ** 20))
+    assert X_hashed.shape == (1, 2 ** 20)
 
     # No collisions on such a small dataset
-    assert_equal(X_counted.nnz, X_hashed.nnz)
+    assert X_counted.nnz == X_hashed.nnz
 
     # When norm is None and not alternate_sign, the tokens are counted up to
     # collisions
@@ -876,8 +876,8 @@ def test_pickling_vectorizer():
     for orig in instances:
         s = pickle.dumps(orig)
         copy = pickle.loads(s)
-        assert_equal(type(copy), orig.__class__)
-        assert_equal(copy.get_params(), orig.get_params())
+        assert type(copy) == orig.__class__
+        assert copy.get_params() == orig.get_params()
         if IS_PYPY and isinstance(orig, HashingVectorizer):
             continue
         else:
@@ -898,7 +898,7 @@ def test_countvectorizer_vocab_sets_when_pickling():
         unpickled_cv = pickle.loads(pickle.dumps(cv))
         cv.fit(ALL_FOOD_DOCS)
         unpickled_cv.fit(ALL_FOOD_DOCS)
-        assert_equal(cv.get_feature_names(), unpickled_cv.get_feature_names())
+        assert cv.get_feature_names() == unpickled_cv.get_feature_names()
 
 
 def test_countvectorizer_vocab_dicts_when_pickling():
@@ -914,7 +914,7 @@ def test_countvectorizer_vocab_dicts_when_pickling():
         unpickled_cv = pickle.loads(pickle.dumps(cv))
         cv.fit(ALL_FOOD_DOCS)
         unpickled_cv.fit(ALL_FOOD_DOCS)
-        assert_equal(cv.get_feature_names(), unpickled_cv.get_feature_names())
+        assert cv.get_feature_names() == unpickled_cv.get_feature_names()
 
 
 def test_stop_words_removal():
@@ -944,7 +944,7 @@ def test_pickling_transformer():
     orig = TfidfTransformer().fit(X)
     s = pickle.dumps(orig)
     copy = pickle.loads(s)
-    assert_equal(type(copy), orig.__class__)
+    assert type(copy) == orig.__class__
     assert_array_equal(
         copy.fit_transform(X).toarray(),
         orig.fit_transform(X).toarray())
@@ -1021,7 +1021,7 @@ def test_vectorizer_vocab_clone():
     vect_vocab_clone = clone(vect_vocab)
     vect_vocab.fit(ALL_FOOD_DOCS)
     vect_vocab_clone.fit(ALL_FOOD_DOCS)
-    assert_equal(vect_vocab_clone.vocabulary_, vect_vocab.vocabulary_)
+    assert vect_vocab_clone.vocabulary_ == vect_vocab.vocabulary_
 
 
 @pytest.mark.parametrize('Vectorizer',

From c64d4475846d6978271d9b0d6d96cf4a1bc12669 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:38:15 +0200
Subject: [PATCH 07/22] fix feature_selection

---
 sklearn/feature_selection/tests/test_base.py  | 16 +++----
 .../tests/test_feature_select.py              | 36 +++++++--------
 .../tests/test_from_model.py                  |  8 ++--
 .../tests/test_mutual_info.py                 |  8 ++--
 sklearn/feature_selection/tests/test_rfe.py   | 46 +++++++++----------
 .../tests/test_variance_threshold.py          |  2 +-
 6 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
index e11b7d94fcebe..f2e3b36d456b5 100644
--- a/sklearn/feature_selection/tests/test_base.py
+++ b/sklearn/feature_selection/tests/test_base.py
@@ -46,8 +46,8 @@ def test_transform_dense():
     assert_array_equal(Xt, Xt_actual2)
 
     # Check dtype matches
-    assert_equal(np.int32, sel.transform(X.astype(np.int32)).dtype)
-    assert_equal(np.float32, sel.transform(X.astype(np.float32)).dtype)
+    assert np.int32 == sel.transform(X.astype(np.int32)).dtype
+    assert np.float32 == sel.transform(X.astype(np.float32)).dtype
 
     # Check 1d list and other dtype:
     names_t_actual = sel.transform([feature_names])
@@ -66,8 +66,8 @@ def test_transform_sparse():
     assert_array_equal(Xt, Xt_actual2.toarray())
 
     # Check dtype matches
-    assert_equal(np.int32, sel.transform(sparse(X).astype(np.int32)).dtype)
-    assert_equal(np.float32, sel.transform(sparse(X).astype(np.float32)).dtype)
+    assert np.int32 == sel.transform(sparse(X).astype(np.int32)).dtype
+    assert np.float32 == sel.transform(sparse(X).astype(np.float32)).dtype
 
     # Check wrong shape raises error
     assert_raises(ValueError, sel.transform, np.array([[1], [2]]))
@@ -79,9 +79,9 @@ def test_inverse_transform_dense():
     assert_array_equal(Xinv, Xinv_actual)
 
     # Check dtype matches
-    assert_equal(np.int32,
+    assert (np.int32 ==
                  sel.inverse_transform(Xt.astype(np.int32)).dtype)
-    assert_equal(np.float32,
+    assert (np.float32 ==
                  sel.inverse_transform(Xt.astype(np.float32)).dtype)
 
     # Check 1d list and other dtype:
@@ -99,9 +99,9 @@ def test_inverse_transform_sparse():
     assert_array_equal(Xinv, Xinv_actual.toarray())
 
     # Check dtype matches
-    assert_equal(np.int32,
+    assert (np.int32 ==
                  sel.inverse_transform(sparse(Xt).astype(np.int32)).dtype)
-    assert_equal(np.float32,
+    assert (np.float32 ==
                  sel.inverse_transform(sparse(Xt).astype(np.float32)).dtype)
 
     # Check wrong shape raises error
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index ef907d99f9aa0..ac1822e1a6063 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -193,10 +193,10 @@ def test_select_percentile_classif_sparse():
     X_r2inv = univariate_filter.inverse_transform(X_r2)
     assert sparse.issparse(X_r2inv)
     support_mask = safe_mask(X_r2inv, support)
-    assert_equal(X_r2inv.shape, X.shape)
+    assert X_r2inv.shape == X.shape
     assert_array_equal(X_r2inv[:, support_mask].toarray(), X_r.toarray())
     # Check other columns are empty
-    assert_equal(X_r2inv.getnnz(), X_r.getnnz())
+    assert X_r2inv.getnnz() == X_r.getnnz()
 
 
 ##############################################################################
@@ -245,7 +245,7 @@ def test_select_kbest_zero():
     assert_array_equal(support, gtruth)
     X_selected = assert_warns_message(UserWarning, 'No features were selected',
                                       univariate_filter.transform, X)
-    assert_equal(X_selected.shape, (20, 0))
+    assert X_selected.shape == (20, 0)
 
 
 def test_select_heuristics_classif():
@@ -371,7 +371,7 @@ def test_select_heuristics_regression():
         assert_array_equal(X_r, X_r2)
         support = univariate_filter.get_support()
         assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
-        assert_less(np.sum(support[5:] == 1), 3)
+        assert np.sum(support[5:] == 1) < 3
 
 
 def test_boundary_case_ch2():
@@ -466,7 +466,7 @@ def test_select_fwe_regression():
     gtruth = np.zeros(20)
     gtruth[:5] = 1
     assert_array_equal(support[:5], np.ones((5, ), dtype=np.bool))
-    assert_less(np.sum(support[5:] == 1), 2)
+    assert np.sum(support[5:] == 1) < 2
 
 
 def test_selectkbest_tiebreaking():
@@ -478,12 +478,12 @@ def test_selectkbest_tiebreaking():
     for X in Xs:
         sel = SelectKBest(dummy_score, k=1)
         X1 = ignore_warnings(sel.fit_transform)([X], y)
-        assert_equal(X1.shape[1], 1)
+        assert X1.shape[1] == 1
         assert_best_scores_kept(sel)
 
         sel = SelectKBest(dummy_score, k=2)
         X2 = ignore_warnings(sel.fit_transform)([X], y)
-        assert_equal(X2.shape[1], 2)
+        assert X2.shape[1] == 2
         assert_best_scores_kept(sel)
 
 
@@ -495,12 +495,12 @@ def test_selectpercentile_tiebreaking():
     for X in Xs:
         sel = SelectPercentile(dummy_score, percentile=34)
         X1 = ignore_warnings(sel.fit_transform)([X], y)
-        assert_equal(X1.shape[1], 1)
+        assert X1.shape[1] == 1
         assert_best_scores_kept(sel)
 
         sel = SelectPercentile(dummy_score, percentile=67)
         X2 = ignore_warnings(sel.fit_transform)([X], y)
-        assert_equal(X2.shape[1], 2)
+        assert X2.shape[1] == 2
         assert_best_scores_kept(sel)
 
 
@@ -514,12 +514,12 @@ def test_tied_pvalues():
     for perm in itertools.permutations((0, 1, 2)):
         X = X0[:, perm]
         Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
-        assert_equal(Xt.shape, (2, 2))
-        assert_not_in(9998, Xt)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
 
         Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
-        assert_equal(Xt.shape, (2, 2))
-        assert_not_in(9998, Xt)
+        assert Xt.shape == (2, 2)
+        assert 9998 not in Xt
 
 
 def test_scorefunc_multilabel():
@@ -529,12 +529,12 @@ def test_scorefunc_multilabel():
     y = [[1, 1], [0, 1], [1, 0]]
 
     Xt = SelectKBest(chi2, k=2).fit_transform(X, y)
-    assert_equal(Xt.shape, (3, 2))
-    assert_not_in(0, Xt)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
 
     Xt = SelectPercentile(chi2, percentile=67).fit_transform(X, y)
-    assert_equal(Xt.shape, (3, 2))
-    assert_not_in(0, Xt)
+    assert Xt.shape == (3, 2)
+    assert 0 not in Xt
 
 
 def test_tied_scores():
@@ -608,7 +608,7 @@ def test_no_feature_selected():
         assert_array_equal(selector.get_support(), np.zeros(10))
         X_selected = assert_warns_message(
             UserWarning, 'No features were selected', selector.transform, X)
-        assert_equal(X_selected.shape, (40, 0))
+        assert X_selected.shape == (40, 0)
 
 
 def test_mutual_info_classif():
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index b6b1b9c91ecf8..a5f61bfa0f061 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -169,7 +169,7 @@ def test_feature_importances():
         assert hasattr(transformer.estimator_, 'feature_importances_')
 
         X_new = transformer.transform(X)
-        assert_less(X_new.shape[1], X.shape[1])
+        assert X_new.shape[1] < X.shape[1]
         importances = transformer.estimator_.feature_importances_
 
         feature_mask = np.abs(importances) > func(importances)
@@ -228,7 +228,7 @@ def test_2d_coef():
             transformer.fit(X, y)
             assert hasattr(transformer.estimator_, 'coef_')
             X_new = transformer.transform(X)
-            assert_less(X_new.shape[1], X.shape[1])
+            assert X_new.shape[1] < X.shape[1]
 
             # Manually check that the norm is correctly performed
             est.fit(X, y)
@@ -266,7 +266,7 @@ def test_calling_fit_reinitializes():
     transformer.fit(data, y)
     transformer.set_params(estimator__C=100)
     transformer.fit(data, y)
-    assert_equal(transformer.estimator_.C, 100)
+    assert transformer.estimator_.C == 100
 
 
 # 0.23. warning about tol not having its correct default value.
@@ -321,4 +321,4 @@ def test_threshold_without_refitting():
 
     # Set a higher threshold to filter out more features.
     model.threshold = "1.0 * mean"
-    assert_greater(X_transform.shape[1], model.transform(data).shape[1])
+    assert X_transform.shape[1] > model.transform(data).shape[1]
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index 0fe437817ed28..4a610bc518f76 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -107,7 +107,7 @@ def test_compute_mi_cd_unique_label():
     y = np.hstack((y, 10))
     mi_2 = _compute_mi(x, y, True, False)
 
-    assert_equal(mi_1, mi_2)
+    assert mi_1 == mi_2
 
 
 # We are going test that feature ordering by MI matches our expectations.
@@ -166,11 +166,11 @@ def test_mutual_info_classif_mixed():
                                     n_neighbors=n_neighbors, random_state=0)
         # Check that the continuous values have an higher MI with greater
         # n_neighbors
-        assert_greater(mi_nn[0], mi[0])
-        assert_greater(mi_nn[1], mi[1])
+        assert mi_nn[0] > mi[0]
+        assert mi_nn[1] > mi[1]
         # The n_neighbors should not have any effect on the discrete value
         # The MI should be the same
-        assert_equal(mi_nn[2], mi[2])
+        assert mi_nn[2] == mi[2]
 
 
 def test_mutual_info_options():
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index 9bd4f69fc9784..1ad5375edb6f3 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -66,7 +66,7 @@ def test_rfe_features_importance():
                                  random_state=generator, max_depth=2)
     rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
     rfe.fit(X, y)
-    assert_equal(len(rfe.ranking_), X.shape[1])
+    assert len(rfe.ranking_) == X.shape[1]
 
     clf_svc = SVC(kernel="linear")
     rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
@@ -89,7 +89,7 @@ def test_rfe():
     rfe.fit(X, y)
     X_r = rfe.transform(X)
     clf.fit(X_r, y)
-    assert_equal(len(rfe.ranking_), X.shape[1])
+    assert len(rfe.ranking_) == X.shape[1]
 
     # sparse model
     clf_sparse = SVC(kernel="linear")
@@ -97,11 +97,11 @@ def test_rfe():
     rfe_sparse.fit(X_sparse, y)
     X_r_sparse = rfe_sparse.transform(X_sparse)
 
-    assert_equal(X_r.shape, iris.data.shape)
+    assert X_r.shape == iris.data.shape
     assert_array_almost_equal(X_r[:10], iris.data[:10])
 
     assert_array_almost_equal(rfe.predict(X), clf.predict(iris.data))
-    assert_equal(rfe.score(X, y), clf.score(iris.data, iris.target))
+    assert rfe.score(X, y) == clf.score(iris.data, iris.target)
     assert_array_almost_equal(X_r, X_r_sparse.toarray())
 
 
@@ -117,8 +117,8 @@ def test_rfe_mockclassifier():
     rfe.fit(X, y)
     X_r = rfe.transform(X)
     clf.fit(X_r, y)
-    assert_equal(len(rfe.ranking_), X.shape[1])
-    assert_equal(X_r.shape, iris.data.shape)
+    assert len(rfe.ranking_) == X.shape[1]
+    assert X_r.shape == iris.data.shape
 
 
 def test_rfecv():
@@ -131,8 +131,8 @@ def test_rfecv():
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
-    assert_equal(len(rfecv.grid_scores_), X.shape[1])
-    assert_equal(len(rfecv.ranking_), X.shape[1])
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
 
     # All the noisy variable were filtered out
@@ -169,13 +169,13 @@ def test_scorer(estimator, X, y):
     # RFECV is to return the FEWEST features that maximize the CV score.
     # Because test_scorer always returns 1.0 in this example, RFECV should
     # reduce the dimensionality to a single feature (i.e. n_features_ = 1)
-    assert_equal(rfecv.n_features_, 1)
+    assert rfecv.n_features_ == 1
 
     # Same as the first two tests, but with step=2
     rfecv = RFECV(estimator=SVC(kernel="linear"), step=2)
     rfecv.fit(X, y)
-    assert_equal(len(rfecv.grid_scores_), 6)
-    assert_equal(len(rfecv.ranking_), X.shape[1])
+    assert len(rfecv.grid_scores_) == 6
+    assert len(rfecv.ranking_) == X.shape[1]
     X_r = rfecv.transform(X)
     assert_array_equal(X_r, iris.data)
 
@@ -203,8 +203,8 @@ def test_rfecv_mockclassifier():
     rfecv = RFECV(estimator=MockClassifier(), step=1)
     rfecv.fit(X, y)
     # non-regression test for missing worst feature:
-    assert_equal(len(rfecv.grid_scores_), X.shape[1])
-    assert_equal(len(rfecv.ranking_), X.shape[1])
+    assert len(rfecv.grid_scores_) == X.shape[1]
+    assert len(rfecv.ranking_) == X.shape[1]
 
 
 def test_rfecv_verbose_output():
@@ -223,7 +223,7 @@ def test_rfecv_verbose_output():
 
     verbose_output = sys.stdout
     verbose_output.seek(0)
-    assert_greater(len(verbose_output.readline()), 0)
+    assert len(verbose_output.readline()) > 0
 
 
 def test_rfecv_grid_scores_size():
@@ -248,11 +248,11 @@ def test_rfecv_grid_scores_size():
 
 def test_rfe_estimator_tags():
     rfe = RFE(SVC(kernel='linear'))
-    assert_equal(rfe._estimator_type, "classifier")
+    assert rfe._estimator_type == "classifier"
     # make sure that cross-validation is stratified
     iris = load_iris()
     score = cross_val_score(rfe, iris.data, iris.target)
-    assert_greater(score.min(), .7)
+    assert score.min() > .7
 
 
 def test_rfe_min_step():
@@ -264,17 +264,17 @@ def test_rfe_min_step():
     # Test when floor(step * n_features) <= 0
     selector = RFE(estimator, step=0.01)
     sel = selector.fit(X, y)
-    assert_equal(sel.support_.sum(), n_features // 2)
+    assert sel.support_.sum() == n_features // 2
 
     # Test when step is between (0,1) and floor(step * n_features) > 0
     selector = RFE(estimator, step=0.20)
     sel = selector.fit(X, y)
-    assert_equal(sel.support_.sum(), n_features // 2)
+    assert sel.support_.sum() == n_features // 2
 
     # Test when step is an integer
     selector = RFE(estimator, step=5)
     sel = selector.fit(X, y)
-    assert_equal(sel.support_.sum(), n_features // 2)
+    assert sel.support_.sum() == n_features // 2
 
 
 def test_number_of_subsets_of_features():
@@ -307,9 +307,9 @@ def formula2(n_features, n_features_to_select, step):
                   n_features_to_select=n_features_to_select, step=step)
         rfe.fit(X, y)
         # this number also equals to the maximum of ranking_
-        assert_equal(np.max(rfe.ranking_),
+        assert (np.max(rfe.ranking_) ==
                      formula1(n_features, n_features_to_select, step))
-        assert_equal(np.max(rfe.ranking_),
+        assert (np.max(rfe.ranking_) ==
                      formula2(n_features, n_features_to_select, step))
 
     # In RFECV, 'fit' calls 'RFE._fit'
@@ -331,9 +331,9 @@ def formula2(n_features, n_features_to_select, step):
         rfecv = RFECV(estimator=SVC(kernel="linear"), step=step)
         rfecv.fit(X, y)
 
-        assert_equal(rfecv.grid_scores_.shape[0],
+        assert (rfecv.grid_scores_.shape[0] ==
                      formula1(n_features, n_features_to_select, step))
-        assert_equal(rfecv.grid_scores_.shape[0],
+        assert (rfecv.grid_scores_.shape[0] ==
                      formula2(n_features, n_features_to_select, step))
 
 
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
index 53a90ace37a40..4164caeac2246 100644
--- a/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -28,7 +28,7 @@ def test_variance_threshold():
     # Test VarianceThreshold with custom variance.
     for X in [data, csr_matrix(data)]:
         X = VarianceThreshold(threshold=.4).fit_transform(X)
-        assert_equal((len(data), 1), X.shape)
+        assert (len(data), 1) == X.shape
 
 
 def test_zero_variance_floating_point_error():

From 344aa108658952aeedf4e7ebbcdf8ca83880ff1c Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:38:54 +0200
Subject: [PATCH 08/22] fix gaussian_processes

---
 sklearn/gaussian_process/tests/test_gpc.py    |  6 ++--
 sklearn/gaussian_process/tests/test_gpr.py    | 12 +++----
 .../gaussian_process/tests/test_kernels.py    | 32 +++++++++----------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
index dc37a317b1e14..46d2498733d4d 100644
--- a/sklearn/gaussian_process/tests/test_gpc.py
+++ b/sklearn/gaussian_process/tests/test_gpc.py
@@ -49,7 +49,7 @@ def test_predict_consistent(kernel):
 def test_lml_improving(kernel):
     # Test that hyperparameter-tuning improves log-marginal likelihood.
     gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y)
-    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
+    assert (gpc.log_marginal_likelihood(gpc.kernel_.theta) >
                    gpc.log_marginal_likelihood(kernel.theta))
 
 
@@ -106,7 +106,7 @@ def test_random_starts():
             kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
             random_state=0).fit(X, y)
         lml = gp.log_marginal_likelihood(gp.kernel_.theta)
-        assert_greater(lml, last_lml - np.finfo(np.float32).eps)
+        assert lml > last_lml - np.finfo(np.float32).eps
         last_lml = lml
 
 
@@ -129,7 +129,7 @@ def optimizer(obj_func, initial_theta, bounds):
     gpc = GaussianProcessClassifier(kernel=kernel, optimizer=optimizer)
     gpc.fit(X, y_mc)
     # Checks that optimizer improved marginal likelihood
-    assert_greater(gpc.log_marginal_likelihood(gpc.kernel_.theta),
+    assert (gpc.log_marginal_likelihood(gpc.kernel_.theta) >
                    gpc.log_marginal_likelihood(kernel.theta))
 
 
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index aebe35cfa2a14..6aa8e97168591 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -57,7 +57,7 @@ def test_gpr_interpolation(kernel):
 def test_lml_improving(kernel):
     # Test that hyperparameter-tuning improves log-marginal likelihood.
     gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
-    assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
+    assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) >
                    gpr.log_marginal_likelihood(kernel.theta))
 
 
@@ -65,7 +65,7 @@ def test_lml_improving(kernel):
 def test_lml_precomputed(kernel):
     # Test that lml of optimized kernel is stored correctly.
     gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
-    assert_equal(gpr.log_marginal_likelihood(gpr.kernel_.theta),
+    assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) ==
                  gpr.log_marginal_likelihood())
 
 
@@ -145,7 +145,7 @@ def test_no_optimizer():
     # Test that kernel parameters are unmodified when optimizer is None.
     kernel = RBF(1.0)
     gpr = GaussianProcessRegressor(kernel=kernel, optimizer=None).fit(X, y)
-    assert_equal(np.exp(gpr.kernel_.theta), 1.0)
+    assert np.exp(gpr.kernel_.theta) == 1.0
 
 
 @pytest.mark.parametrize('kernel', kernels)
@@ -168,7 +168,7 @@ def test_anisotropic_kernel():
 
     kernel = RBF([1.0, 1.0])
     gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y)
-    assert_greater(np.exp(gpr.kernel_.theta[1]),
+    assert (np.exp(gpr.kernel_.theta[1]) >
                    np.exp(gpr.kernel_.theta[0]) * 5)
 
 
@@ -191,7 +191,7 @@ def test_random_starts():
             kernel=kernel, n_restarts_optimizer=n_restarts_optimizer,
             random_state=0,).fit(X, y)
         lml = gp.log_marginal_likelihood(gp.kernel_.theta)
-        assert_greater(lml, last_lml - np.finfo(np.float32).eps)
+        assert lml > last_lml - np.finfo(np.float32).eps
         last_lml = lml
 
 
@@ -286,7 +286,7 @@ def optimizer(obj_func, initial_theta, bounds):
     gpr = GaussianProcessRegressor(kernel=kernel, optimizer=optimizer)
     gpr.fit(X, y)
     # Checks that optimizer improved marginal likelihood
-    assert_greater(gpr.log_marginal_likelihood(gpr.kernel_.theta),
+    assert (gpr.log_marginal_likelihood(gpr.kernel_.theta) >
                    gpr.log_marginal_likelihood(gpr.kernel.theta))
 
 
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index 33a769b852c59..510707cb760fb 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -53,9 +53,9 @@ def test_kernel_gradient(kernel):
     # Compare analytic and numeric gradient of kernels.
     K, K_gradient = kernel(X, eval_gradient=True)
 
-    assert_equal(K_gradient.shape[0], X.shape[0])
-    assert_equal(K_gradient.shape[1], X.shape[0])
-    assert_equal(K_gradient.shape[2], kernel.theta.shape[0])
+    assert K_gradient.shape[0] == X.shape[0]
+    assert K_gradient.shape[1] == X.shape[0]
+    assert K_gradient.shape[2] == kernel.theta.shape[0]
 
     def eval_kernel_for_theta(theta):
         kernel_clone = kernel.clone_with_theta(theta)
@@ -84,15 +84,15 @@ def test_kernel_theta(kernel):
     args = [p.name for p in init_sign if p.name != 'self']
     theta_vars = map(lambda s: s[0:-len("_bounds")],
                      filter(lambda s: s.endswith("_bounds"), args))
-    assert_equal(
+    assert (
         set(hyperparameter.name
-            for hyperparameter in kernel.hyperparameters),
+            for hyperparameter in kernel.hyperparameters) ==
         set(theta_vars))
 
     # Check that values returned in theta are consistent with
     # hyperparameter values (being their logarithms)
     for i, hyperparameter in enumerate(kernel.hyperparameters):
-        assert_equal(theta[i],
+        assert (theta[i] ==
                      np.log(getattr(kernel, hyperparameter.name)))
 
     # Fixed kernel parameters must be excluded from theta and gradient.
@@ -105,14 +105,14 @@ def test_kernel_theta(kernel):
         # Check that theta and K_gradient are identical with the fixed
         # dimension left out
         _, K_gradient_new = new_kernel(X, eval_gradient=True)
-        assert_equal(theta.shape[0], new_kernel.theta.shape[0] + 1)
-        assert_equal(K_gradient.shape[2], K_gradient_new.shape[2] + 1)
+        assert theta.shape[0] == new_kernel.theta.shape[0] + 1
+        assert K_gradient.shape[2] == K_gradient_new.shape[2] + 1
         if i > 0:
-            assert_equal(theta[:i], new_kernel.theta[:i])
+            assert theta[:i] == new_kernel.theta[:i]
             assert_array_equal(K_gradient[..., :i],
                                K_gradient_new[..., :i])
         if i + 1 < len(kernel.hyperparameters):
-            assert_equal(theta[i + 1:], new_kernel.theta[i:])
+            assert theta[i + 1:] == new_kernel.theta[i:]
             assert_array_equal(K_gradient[..., i + 1:],
                                K_gradient_new[..., i:])
 
@@ -192,7 +192,7 @@ def check_hyperparameters_equal(kernel1, kernel2):
         if attr.startswith("hyperparameter_"):
             attr_value1 = getattr(kernel1, attr)
             attr_value2 = getattr(kernel2, attr)
-            assert_equal(attr_value1, attr_value2)
+            assert attr_value1 == attr_value2
 
 
 @pytest.mark.parametrize("kernel", kernels)
@@ -202,11 +202,11 @@ def test_kernel_clone(kernel):
 
     # XXX: Should this be fixed?
     # This differs from the sklearn's estimators equality check.
-    assert_equal(kernel, kernel_cloned)
-    assert_not_equal(id(kernel), id(kernel_cloned))
+    assert kernel == kernel_cloned
+    assert id(kernel) != id(kernel_cloned)
 
     # Check that all constructor parameters are equal.
-    assert_equal(kernel.get_params(), kernel_cloned.get_params())
+    assert kernel.get_params() == kernel_cloned.get_params()
 
     # Check that all hyperparameters are equal.
     check_hyperparameters_equal(kernel, kernel_cloned)
@@ -236,9 +236,9 @@ def test_kernel_clone_after_set_params(kernel):
             params['length_scale_bounds'] = bounds * 2
         kernel_cloned.set_params(**params)
         kernel_cloned_clone = clone(kernel_cloned)
-        assert_equal(kernel_cloned_clone.get_params(),
+        assert (kernel_cloned_clone.get_params() ==
                      kernel_cloned.get_params())
-        assert_not_equal(id(kernel_cloned_clone), id(kernel_cloned))
+        assert id(kernel_cloned_clone) != id(kernel_cloned)
         check_hyperparameters_equal(kernel_cloned, kernel_cloned_clone)
 
 

From 103bc56aaaec6d33d19c1820a60852e64e51476d Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:49:12 +0200
Subject: [PATCH 09/22] fix linear_model

---
 sklearn/linear_model/tests/test_base.py       | 72 ++++++++--------
 sklearn/linear_model/tests/test_bayes.py      |  2 +-
 .../tests/test_coordinate_descent.py          | 48 +++++------
 sklearn/linear_model/tests/test_huber.py      |  4 +-
 .../linear_model/tests/test_least_angle.py    | 34 ++++----
 sklearn/linear_model/tests/test_logistic.py   | 48 +++++------
 sklearn/linear_model/tests/test_omp.py        | 36 ++++----
 .../tests/test_passive_aggressive.py          |  8 +-
 sklearn/linear_model/tests/test_perceptron.py |  2 +-
 sklearn/linear_model/tests/test_ransac.py     | 86 +++++++++----------
 sklearn/linear_model/tests/test_ridge.py      | 54 ++++++------
 sklearn/linear_model/tests/test_sag.py        |  8 +-
 sklearn/linear_model/tests/test_sgd.py        | 86 +++++++++----------
 .../tests/test_sparse_coordinate_descent.py   | 18 ++--
 sklearn/linear_model/tests/test_theil_sen.py  | 10 +--
 15 files changed, 258 insertions(+), 258 deletions(-)

diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 1679f9a9c2930..84676151d2a74 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -73,8 +73,8 @@ def test_linear_regression_sample_weights():
             coefs1 = reg.coef_
             inter1 = reg.intercept_
 
-            assert_equal(reg.coef_.shape, (X.shape[1], ))  # sanity checks
-            assert_greater(reg.score(X, y), 0.5)
+            assert reg.coef_.shape == (X.shape[1], )  # sanity checks
+            assert reg.score(X, y) > 0.5
 
             # Closed form of the weighted least square
             # theta = (X^T W X)^(-1) * X^T W y
@@ -130,11 +130,11 @@ def test_fit_intercept():
     lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
     lr3_with_intercept = LinearRegression(fit_intercept=True).fit(X3, y)
 
-    assert_equal(lr2_with_intercept.coef_.shape,
+    assert (lr2_with_intercept.coef_.shape ==
                  lr2_without_intercept.coef_.shape)
-    assert_equal(lr3_with_intercept.coef_.shape,
+    assert (lr3_with_intercept.coef_.shape ==
                  lr3_without_intercept.coef_.shape)
-    assert_equal(lr2_without_intercept.coef_.ndim,
+    assert (lr2_without_intercept.coef_.ndim ==
                  lr3_without_intercept.coef_.ndim)
 
 
@@ -183,7 +183,7 @@ def test_linear_regression_multiple_outcome(random_state=0):
 
     reg = LinearRegression(fit_intercept=True)
     reg.fit((X), Y)
-    assert_equal(reg.coef_.shape, (2, n_features))
+    assert reg.coef_.shape == (2, n_features)
     Y_pred = reg.predict(X)
     reg.fit(X, y)
     y_pred = reg.predict(X)
@@ -200,7 +200,7 @@ def test_linear_regression_sparse_multiple_outcome(random_state=0):
 
     ols = LinearRegression()
     ols.fit(X, Y)
-    assert_equal(ols.coef_.shape, (2, n_features))
+    assert ols.coef_.shape == (2, n_features)
     Y_pred = ols.predict(X)
     ols.fit(X, y.ravel())
     y_pred = ols.predict(X)
@@ -344,7 +344,7 @@ def test_csr_preprocess_data():
     X[X < 2.5] = 0.0
     csr = sparse.csr_matrix(X)
     csr_, y, _, _, _ = _preprocess_data(csr, y, True)
-    assert_equal(csr_.getformat(), 'csr')
+    assert csr_.getformat() == 'csr'
 
 
 @pytest.mark.parametrize('is_sparse', (True, False))
@@ -399,34 +399,34 @@ def test_dtype_preprocess_data():
                 _preprocess_data(X_64, y_32, fit_intercept=fit_intercept,
                                  normalize=normalize, return_mean=True))
 
-            assert_equal(Xt_32.dtype, np.float32)
-            assert_equal(yt_32.dtype, np.float32)
-            assert_equal(X_mean_32.dtype, np.float32)
-            assert_equal(y_mean_32.dtype, np.float32)
-            assert_equal(X_norm_32.dtype, np.float32)
-
-            assert_equal(Xt_64.dtype, np.float64)
-            assert_equal(yt_64.dtype, np.float64)
-            assert_equal(X_mean_64.dtype, np.float64)
-            assert_equal(y_mean_64.dtype, np.float64)
-            assert_equal(X_norm_64.dtype, np.float64)
-
-            assert_equal(Xt_3264.dtype, np.float32)
-            assert_equal(yt_3264.dtype, np.float32)
-            assert_equal(X_mean_3264.dtype, np.float32)
-            assert_equal(y_mean_3264.dtype, np.float32)
-            assert_equal(X_norm_3264.dtype, np.float32)
-
-            assert_equal(Xt_6432.dtype, np.float64)
-            assert_equal(yt_6432.dtype, np.float64)
-            assert_equal(X_mean_6432.dtype, np.float64)
-            assert_equal(y_mean_6432.dtype, np.float64)
-            assert_equal(X_norm_6432.dtype, np.float64)
-
-            assert_equal(X_32.dtype, np.float32)
-            assert_equal(y_32.dtype, np.float32)
-            assert_equal(X_64.dtype, np.float64)
-            assert_equal(y_64.dtype, np.float64)
+            assert Xt_32.dtype == np.float32
+            assert yt_32.dtype == np.float32
+            assert X_mean_32.dtype == np.float32
+            assert y_mean_32.dtype == np.float32
+            assert X_norm_32.dtype == np.float32
+
+            assert Xt_64.dtype == np.float64
+            assert yt_64.dtype == np.float64
+            assert X_mean_64.dtype == np.float64
+            assert y_mean_64.dtype == np.float64
+            assert X_norm_64.dtype == np.float64
+
+            assert Xt_3264.dtype == np.float32
+            assert yt_3264.dtype == np.float32
+            assert X_mean_3264.dtype == np.float32
+            assert y_mean_3264.dtype == np.float32
+            assert X_norm_3264.dtype == np.float32
+
+            assert Xt_6432.dtype == np.float64
+            assert yt_6432.dtype == np.float64
+            assert X_mean_6432.dtype == np.float64
+            assert y_mean_6432.dtype == np.float64
+            assert X_norm_6432.dtype == np.float64
+
+            assert X_32.dtype == np.float32
+            assert y_32.dtype == np.float32
+            assert X_64.dtype == np.float64
+            assert y_64.dtype == np.float64
 
             assert_array_almost_equal(Xt_32, Xt_64)
             assert_array_almost_equal(yt_32, yt_64)
diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py
index 231260c3847aa..355cd042347af 100644
--- a/sklearn/linear_model/tests/test_bayes.py
+++ b/sklearn/linear_model/tests/test_bayes.py
@@ -184,7 +184,7 @@ def test_update_of_sigma_in_ard():
     clf.fit(X, y)
     # With the inputs above, ARDRegression prunes one of the two coefficients
     # in the first iteration. Hence, the expected shape of `sigma_` is (1, 1).
-    assert_equal(clf.sigma_.shape, (1, 1))
+    assert clf.sigma_.shape == (1, 1)
     # Ensure that no error is thrown at prediction stage
     clf.predict(X, return_std=True)
 
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index 5e9088efe1ab9..aa6773fce415b 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -169,7 +169,7 @@ def test_lasso_cv():
                                    clf.mse_path_[5].mean(), significant=2)
 
     # test set
-    assert_greater(clf.score(X_test, y_test), 0.99)
+    assert clf.score(X_test, y_test) > 0.99
 
 
 def test_lasso_cv_with_some_model_selection():
@@ -250,7 +250,7 @@ def test_enet_path():
     assert_almost_equal(clf.alpha_, min(clf.alphas_))
     # Non-sparse ground truth: we should have selected an elastic-net
     # that is closer to ridge than to lasso
-    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))
+    assert clf.l1_ratio_ == min(clf.l1_ratio)
 
     clf = ElasticNetCV(alphas=[0.01, 0.05, 0.1], eps=2e-3,
                        l1_ratio=[0.5, 0.7], cv=3,
@@ -262,11 +262,11 @@ def test_enet_path():
     assert_almost_equal(clf.alpha_, min(clf.alphas_))
     # Non-sparse ground truth: we should have selected an elastic-net
     # that is closer to ridge than to lasso
-    assert_equal(clf.l1_ratio_, min(clf.l1_ratio))
+    assert clf.l1_ratio_ == min(clf.l1_ratio)
 
     # We are in well-conditioned settings with low noise: we should
     # have a good test-set performance
-    assert_greater(clf.score(X_test, y_test), 0.99)
+    assert clf.score(X_test, y_test) > 0.99
 
     # Multi-output/target case
     X, y, X_test, y_test = build_dataset(n_features=10, n_targets=3)
@@ -275,8 +275,8 @@ def test_enet_path():
     ignore_warnings(clf.fit)(X, y)
     # We are in well-conditioned settings with low noise: we should
     # have a good test-set performance
-    assert_greater(clf.score(X_test, y_test), 0.99)
-    assert_equal(clf.coef_.shape, (3, 10))
+    assert clf.score(X_test, y_test) > 0.99
+    assert clf.coef_.shape == (3, 10)
 
     # Mono-output should have same cross-validated alpha_ and l1_ratio_
     # in both cases.
@@ -297,8 +297,8 @@ def test_path_parameters():
                        l1_ratio=0.5, tol=1e-3)
     clf.fit(X, y)  # new params
     assert_almost_equal(0.5, clf.l1_ratio)
-    assert_equal(50, clf.n_alphas)
-    assert_equal(50, len(clf.alphas_))
+    assert 50 == clf.n_alphas
+    assert 50 == len(clf.alphas_)
 
 
 def test_warm_start():
@@ -465,19 +465,19 @@ def test_multitask_enet_and_lasso_cv():
     clf = MultiTaskElasticNetCV(n_alphas=10, eps=1e-3, max_iter=100,
                                 l1_ratio=[0.3, 0.5], tol=1e-3, cv=3)
     clf.fit(X, y)
-    assert_equal(0.5, clf.l1_ratio_)
-    assert_equal((3, X.shape[1]), clf.coef_.shape)
-    assert_equal((3, ), clf.intercept_.shape)
-    assert_equal((2, 10, 3), clf.mse_path_.shape)
-    assert_equal((2, 10), clf.alphas_.shape)
+    assert 0.5 == clf.l1_ratio_
+    assert (3, X.shape[1]) == clf.coef_.shape
+    assert (3, ) == clf.intercept_.shape
+    assert (2, 10, 3) == clf.mse_path_.shape
+    assert (2, 10) == clf.alphas_.shape
 
     X, y, _, _ = build_dataset(n_targets=3)
     clf = MultiTaskLassoCV(n_alphas=10, eps=1e-3, max_iter=100, tol=1e-3, cv=3)
     clf.fit(X, y)
-    assert_equal((3, X.shape[1]), clf.coef_.shape)
-    assert_equal((3, ), clf.intercept_.shape)
-    assert_equal((10, 3), clf.mse_path_.shape)
-    assert_equal(10, len(clf.alphas_))
+    assert (3, X.shape[1]) == clf.coef_.shape
+    assert (3, ) == clf.intercept_.shape
+    assert (10, 3) == clf.mse_path_.shape
+    assert 10 == len(clf.alphas_)
 
 
 def test_1d_multioutput_enet_and_multitask_enet_cv():
@@ -540,20 +540,20 @@ def test_warm_start_convergence():
     n_iter_reference = model.n_iter_
 
     # This dataset is not trivial enough for the model to converge in one pass.
-    assert_greater(n_iter_reference, 2)
+    assert n_iter_reference > 2
 
     # Check that n_iter_ is invariant to multiple calls to fit
     # when warm_start=False, all else being equal.
     model.fit(X, y)
     n_iter_cold_start = model.n_iter_
-    assert_equal(n_iter_cold_start, n_iter_reference)
+    assert n_iter_cold_start == n_iter_reference
 
     # Fit the same model again, using a warm start: the optimizer just performs
     # a single pass before checking that it has already converged
     model.set_params(warm_start=True)
     model.fit(X, y)
     n_iter_warm_start = model.n_iter_
-    assert_equal(n_iter_warm_start, 1)
+    assert n_iter_warm_start == 1
 
 
 def test_warm_start_convergence_with_regularizer_decrement():
@@ -568,7 +568,7 @@ def test_warm_start_convergence_with_regularizer_decrement():
     # Fitting with high regularization is easier it should converge faster
     # in general.
     high_reg_model = ElasticNet(alpha=final_alpha * 10).fit(X, y)
-    assert_greater(low_reg_model.n_iter_, high_reg_model.n_iter_)
+    assert low_reg_model.n_iter_ > high_reg_model.n_iter_
 
     # Fit the solution to the original, less regularized version of the
     # problem but from the solution of the highly regularized variant of
@@ -577,7 +577,7 @@ def test_warm_start_convergence_with_regularizer_decrement():
     warm_low_reg_model = deepcopy(high_reg_model)
     warm_low_reg_model.set_params(warm_start=True, alpha=final_alpha)
     warm_low_reg_model.fit(X, y)
-    assert_greater(low_reg_model.n_iter_, warm_low_reg_model.n_iter_)
+    assert low_reg_model.n_iter_ > warm_low_reg_model.n_iter_
 
 
 def test_random_descent():
@@ -741,7 +741,7 @@ def test_enet_float_precision():
                 coef[('simple', dtype)] = clf.coef_
                 intercept[('simple', dtype)] = clf.intercept_
 
-                assert_equal(clf.coef_.dtype, dtype)
+                assert clf.coef_.dtype == dtype
 
                 # test precompute Gram array
                 Gram = X.T.dot(X)
@@ -762,7 +762,7 @@ def test_enet_float_precision():
                 clf_multioutput.fit(X, multi_y)
                 coef[('multi', dtype)] = clf_multioutput.coef_
                 intercept[('multi', dtype)] = clf_multioutput.intercept_
-                assert_equal(clf.coef_.dtype, dtype)
+                assert clf.coef_.dtype == dtype
 
             for v in ['simple', 'multi']:
                 assert_array_almost_equal(coef[(v, np.float32)],
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 156ac72958d01..4145888c3d996 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -201,10 +201,10 @@ def test_huber_better_r2_score():
     ridge.fit(X, y)
     ridge_score = ridge.score(X[mask], y[mask])
     ridge_outlier_score = ridge.score(X[~mask], y[~mask])
-    assert_greater(huber_score, ridge_score)
+    assert huber_score > ridge_score
 
     # The huber model should also fit poorly on the outliers.
-    assert_greater(ridge_outlier_score, huber_outlier_score)
+    assert ridge_outlier_score > huber_outlier_score
 
 
 def test_huber_bool():
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index 1533c981fa391..9633989693a83 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -78,7 +78,7 @@ def test_simple_precomputed():
 
 
 def _assert_same_lars_path_result(output1, output2):
-    assert_equal(len(output1), len(output2))
+    assert len(output1) == len(output2)
     for o1, o2 in zip(output1, output2):
         assert_allclose(o1, o2)
 
@@ -150,7 +150,7 @@ def test_collinearity():
     _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
     assert not np.isnan(coef_path_).any()
     residual = np.dot(X, coef_path_[:, -1]) - y
-    assert_less((residual ** 2).sum(), 1.)  # just make sure it's bounded
+    assert (residual ** 2).sum() < 1.  # just make sure it's bounded
 
     n_samples = 10
     X = rng.rand(n_samples, 5)
@@ -246,7 +246,7 @@ def test_rank_deficient_design():
         coef_cd_ = coord_descent.fit(X, y).coef_
         obj_cd = ((1. / (2. * 3.)) * linalg.norm(y - np.dot(X, coef_cd_)) ** 2
                   + .1 * linalg.norm(coef_cd_, 1))
-        assert_less(obj_lars, obj_cd * (1. + 1e-8))
+        assert obj_lars < obj_cd * (1. + 1e-8)
 
 
 def test_lasso_lars_vs_lasso_cd():
@@ -262,7 +262,7 @@ def test_lasso_lars_vs_lasso_cd():
         lasso_cd.alpha = a
         lasso_cd.fit(X, y)
         error = linalg.norm(c - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
     # similar test, with the classifiers
     for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
@@ -270,7 +270,7 @@ def test_lasso_lars_vs_lasso_cd():
         clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
                                   normalize=False).fit(X, y)
         err = linalg.norm(clf1.coef_ - clf2.coef_)
-        assert_less(err, 1e-3)
+        assert err < 1e-3
 
     # same test, with normalized data
     X = diabetes.data
@@ -283,7 +283,7 @@ def test_lasso_lars_vs_lasso_cd():
         lasso_cd.alpha = a
         lasso_cd.fit(X, y)
         error = linalg.norm(c - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
 
 def test_lasso_lars_vs_lasso_cd_early_stopping():
@@ -299,7 +299,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping():
         lasso_cd.alpha = alphas[-1]
         lasso_cd.fit(X, y)
         error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
     # same test, with normalization
     for alpha_min in alphas_min:
@@ -310,7 +310,7 @@ def test_lasso_lars_vs_lasso_cd_early_stopping():
         lasso_cd.alpha = alphas[-1]
         lasso_cd.fit(X, y)
         error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
 
 def test_lasso_lars_path_length():
@@ -382,7 +382,7 @@ def objective_function(coef):
     cd_coef_ = coord_descent.fit(X, y).coef_
     cd_obj = objective_function(cd_coef_)
 
-    assert_less(lars_obj, cd_obj * (1. + 1e-8))
+    assert lars_obj < cd_obj * (1. + 1e-8)
 
 
 def test_lars_add_features():
@@ -399,10 +399,10 @@ def test_lars_add_features():
 def test_lars_n_nonzero_coefs(verbose=False):
     lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
     lars.fit(X, y)
-    assert_equal(len(lars.coef_.nonzero()[0]), 6)
+    assert len(lars.coef_.nonzero()[0]) == 6
     # The path should be of length 6 + 1 in a Lars going down to 6
     # non-zero coefs
-    assert_equal(len(lars.alphas_), 7)
+    assert len(lars.alphas_) == 7
 
 
 @ignore_warnings
@@ -482,9 +482,9 @@ def test_lasso_lars_ic():
     lars_aic.fit(X, y)
     nonzero_bic = np.where(lars_bic.coef_)[0]
     nonzero_aic = np.where(lars_aic.coef_)[0]
-    assert_greater(lars_bic.alpha_, lars_aic.alpha_)
-    assert_less(len(nonzero_bic), len(nonzero_aic))
-    assert_less(np.max(nonzero_bic), diabetes.data.shape[1])
+    assert lars_bic.alpha_ > lars_aic.alpha_
+    assert len(nonzero_bic) < len(nonzero_aic)
+    assert np.max(nonzero_bic) < diabetes.data.shape[1]
 
     # test error on unknown IC
     lars_broken = linear_model.LassoLarsIC('<unknown>')
@@ -577,7 +577,7 @@ def test_lasso_lars_vs_lasso_cd_positive():
         lasso_cd.alpha = a
         lasso_cd.fit(X, y)
         error = linalg.norm(c - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
     # The range of alphas chosen for coefficient comparison here is restricted
     # as compared with the above test without the positive option. This is due
@@ -594,7 +594,7 @@ def test_lasso_lars_vs_lasso_cd_positive():
         clf2 = linear_model.Lasso(fit_intercept=False, alpha=alpha, tol=1e-8,
                                   normalize=False, positive=True).fit(X, y)
         err = linalg.norm(clf1.coef_ - clf2.coef_)
-        assert_less(err, 1e-3)
+        assert err < 1e-3
 
     # normalized data
     X = diabetes.data
@@ -606,7 +606,7 @@ def test_lasso_lars_vs_lasso_cd_positive():
         lasso_cd.alpha = a
         lasso_cd.fit(X, y)
         error = linalg.norm(c - lasso_cd.coef_)
-        assert_less(error, 0.01)
+        assert error < 0.01
 
 
 def test_lasso_lars_vs_R_implementation():
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index 8f2c59f30a216..cdbe2f9b3ba37 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -56,11 +56,11 @@ def check_predictions(clf, X, y):
     predicted = clf.fit(X, y).predict(X)
     assert_array_equal(clf.classes_, classes)
 
-    assert_equal(predicted.shape, (n_samples,))
+    assert predicted.shape == (n_samples,)
     assert_array_equal(predicted, y)
 
     probabilities = clf.predict_proba(X)
-    assert_equal(probabilities.shape, (n_samples, n_classes))
+    assert probabilities.shape == (n_samples, n_classes)
     assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))
     assert_array_equal(probabilities.argmax(axis=1), y)
 
@@ -188,14 +188,14 @@ def test_predict_iris():
         assert_array_equal(np.unique(target), clf.classes_)
 
         pred = clf.predict(iris.data)
-        assert_greater(np.mean(pred == target), .95)
+        assert np.mean(pred == target) > .95
 
         probabilities = clf.predict_proba(iris.data)
         assert_array_almost_equal(probabilities.sum(axis=1),
                                   np.ones(n_samples))
 
         pred = iris.target_names[probabilities.argmax(axis=1)]
-        assert_greater(np.mean(pred == target), .95)
+        assert np.mean(pred == target) > .95
 
 
 @pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
@@ -260,8 +260,8 @@ def test_multinomial_binary(solver):
                              random_state=42, max_iter=2000)
     clf.fit(iris.data, target)
 
-    assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
-    assert_equal(clf.intercept_.shape, (1,))
+    assert clf.coef_.shape == (1, iris.data.shape[1])
+    assert clf.intercept_.shape == (1,)
     assert_array_equal(clf.predict(iris.data), target)
 
     mlr = LogisticRegression(solver=solver, multi_class='multinomial',
@@ -269,7 +269,7 @@ def test_multinomial_binary(solver):
     mlr.fit(iris.data, target)
     pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
                                   axis=1)]
-    assert_greater(np.mean(pred == target), .9)
+    assert np.mean(pred == target) > .9
 
 
 def test_multinomial_binary_probabilities():
@@ -513,7 +513,7 @@ def test_logistic_cv():
 
     assert_array_equal(lr_cv.coef_.shape, (1, n_features))
     assert_array_equal(lr_cv.classes_, [-1, 1])
-    assert_equal(len(lr_cv.classes_), 2)
+    assert len(lr_cv.classes_) == 2
 
     coefs_paths = np.asarray(list(lr_cv.coefs_paths_.values()))
     assert_array_equal(coefs_paths.shape, (1, 3, 1, n_features))
@@ -604,7 +604,7 @@ def test_logistic_cv_sparse():
     clfs.fit(csr, y)
     assert_array_almost_equal(clfs.coef_, clf.coef_)
     assert_array_almost_equal(clfs.intercept_, clf.intercept_)
-    assert_equal(clfs.C_, clf.C_)
+    assert clfs.C_ == clf.C_
 
 
 def test_intercept_logistic_helper():
@@ -1040,7 +1040,7 @@ def test_logreg_intercept_scaling_zero():
 
     clf = LogisticRegression(fit_intercept=False)
     clf.fit(X, Y1)
-    assert_equal(clf.intercept_, 0.)
+    assert clf.intercept_ == 0.
 
 
 def test_logreg_l1():
@@ -1147,13 +1147,13 @@ def test_logreg_predict_proba_multinomial():
     clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
     clf_ovr.fit(X, y)
     clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
-    assert_greater(clf_ovr_loss, clf_multi_loss)
+    assert clf_ovr_loss > clf_multi_loss
 
     # Predicted probabilities using the soft-max function should give a
     # smaller loss than those using the logistic function.
     clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
     clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
-    assert_greater(clf_wrong_loss, clf_multi_loss)
+    assert clf_wrong_loss > clf_multi_loss
 
 
 def test_max_iter():
@@ -1172,7 +1172,7 @@ def test_max_iter():
                                         multi_class=multi_class,
                                         random_state=0, solver=solver)
                 assert_warns(ConvergenceWarning, lr.fit, X, y_bin)
-                assert_equal(lr.n_iter_[0], max_iter)
+                assert lr.n_iter_[0] == max_iter
 
 
 @pytest.mark.parametrize('solver',
@@ -1193,16 +1193,16 @@ def test_n_iter(solver):
                              solver=solver, C=1.,
                              random_state=42, max_iter=100)
     clf.fit(X, y)
-    assert_equal(clf.n_iter_.shape, (n_classes,))
+    assert clf.n_iter_.shape == (n_classes,)
 
     n_classes = np.unique(y).shape[0]
     clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
                                solver=solver, Cs=n_Cs, cv=n_cv_fold,
                                random_state=42, max_iter=100)
     clf.fit(X, y)
-    assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
+    assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs)
     clf.fit(X, y_bin)
-    assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
+    assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs)
 
     # multinomial case
     n_classes = 1
@@ -1213,15 +1213,15 @@ def test_n_iter(solver):
                              solver=solver, C=1.,
                              random_state=42, max_iter=100)
     clf.fit(X, y)
-    assert_equal(clf.n_iter_.shape, (n_classes,))
+    assert clf.n_iter_.shape == (n_classes,)
 
     clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
                                solver=solver, Cs=n_Cs, cv=n_cv_fold,
                                random_state=42, max_iter=100)
     clf.fit(X, y)
-    assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
+    assert clf.n_iter_.shape == (n_classes, n_cv_fold, n_Cs)
     clf.fit(X, y_bin)
-    assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
+    assert clf.n_iter_.shape == (1, n_cv_fold, n_Cs)
 
 
 @pytest.mark.parametrize('solver', ('newton-cg', 'sag', 'saga', 'lbfgs'))
@@ -1251,9 +1251,9 @@ def test_warm_start(solver, warm_start, fit_intercept, multi_class):
            % (solver, multi_class, str(fit_intercept),
               str(warm_start)))
     if warm_start:
-        assert_greater(2.0, cum_diff, msg)
+        assert 2.0 > cum_diff, msg
     else:
-        assert_greater(cum_diff, 2.0, msg)
+        assert cum_diff > 2.0, msg
 
 
 def test_saga_vs_liblinear():
@@ -1314,17 +1314,17 @@ def test_dtype_match(solver, multi_class):
     # Check type consistency
     lr_32 = clone(lr_templ)
     lr_32.fit(X_32, y_32)
-    assert_equal(lr_32.coef_.dtype, X_32.dtype)
+    assert lr_32.coef_.dtype == X_32.dtype
 
     # check consistency with sparsity
     lr_32_sparse = clone(lr_templ)
     lr_32_sparse.fit(X_sparse_32, y_32)
-    assert_equal(lr_32_sparse.coef_.dtype, X_sparse_32.dtype)
+    assert lr_32_sparse.coef_.dtype == X_sparse_32.dtype
 
     # Check accuracy consistency
     lr_64 = clone(lr_templ)
     lr_64.fit(X_64, y_64)
-    assert_equal(lr_64.coef_.dtype, X_64.dtype)
+    assert lr_64.coef_.dtype == X_64.dtype
 
     # solver_tol bounds the norm of the loss gradient
     # dw ~= inv(H)*grad ==> |dw| ~= |inv(H)| * solver_tol, where H - hessian
diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py
index c0736f740835a..074af8045a59d 100644
--- a/sklearn/linear_model/tests/test_omp.py
+++ b/sklearn/linear_model/tests/test_omp.py
@@ -30,16 +30,16 @@
 
 
 def test_correct_shapes():
-    assert_equal(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape,
+    assert (orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape ==
                  (n_features,))
-    assert_equal(orthogonal_mp(X, y, n_nonzero_coefs=5).shape,
+    assert (orthogonal_mp(X, y, n_nonzero_coefs=5).shape ==
                  (n_features, 3))
 
 
 def test_correct_shapes_gram():
-    assert_equal(orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape,
+    assert (orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape ==
                  (n_features,))
-    assert_equal(orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape,
+    assert (orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape ==
                  (n_features, 3))
 
 
@@ -120,13 +120,13 @@ def test_orthogonal_mp_gram_readonly():
 def test_estimator():
     omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
     omp.fit(X, y[:, 0])
-    assert_equal(omp.coef_.shape, (n_features,))
-    assert_equal(omp.intercept_.shape, ())
+    assert omp.coef_.shape == (n_features,)
+    assert omp.intercept_.shape == ()
     assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
 
     omp.fit(X, y)
-    assert_equal(omp.coef_.shape, (n_targets, n_features))
-    assert_equal(omp.intercept_.shape, (n_targets,))
+    assert omp.coef_.shape == (n_targets, n_features)
+    assert omp.intercept_.shape == (n_targets,)
     assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
 
     coef_normalized = omp.coef_[0].copy()
@@ -137,12 +137,12 @@ def test_estimator():
     omp.set_params(fit_intercept=False, normalize=False)
     omp.fit(X, y[:, 0])
     assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
-    assert_equal(omp.coef_.shape, (n_features,))
-    assert_equal(omp.intercept_, 0)
+    assert omp.coef_.shape == (n_features,)
+    assert omp.intercept_ == 0
 
     omp.fit(X, y)
-    assert_equal(omp.coef_.shape, (n_targets, n_features))
-    assert_equal(omp.intercept_, 0)
+    assert omp.coef_.shape == (n_targets, n_features)
+    assert omp.intercept_ == 0
     assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
 
 
@@ -175,18 +175,18 @@ def test_no_atoms():
     Xy_empty = np.dot(X.T, y_empty)
     gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, 1)
     gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, 1)
-    assert_equal(np.all(gamma_empty == 0), True)
-    assert_equal(np.all(gamma_empty_gram == 0), True)
+    assert np.all(gamma_empty == 0) == True
+    assert np.all(gamma_empty_gram == 0) == True
 
 
 def test_omp_path():
     path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
     last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
-    assert_equal(path.shape, (n_features, n_targets, 5))
+    assert path.shape == (n_features, n_targets, 5)
     assert_array_almost_equal(path[:, :, -1], last)
     path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
     last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
-    assert_equal(path.shape, (n_features, n_targets, 5))
+    assert path.shape == (n_features, n_targets, 5)
     assert_array_almost_equal(path[:, :, -1], last)
 
 
@@ -195,7 +195,7 @@ def test_omp_return_path_prop_with_gram():
                          precompute=True)
     last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False,
                          precompute=True)
-    assert_equal(path.shape, (n_features, n_targets, 5))
+    assert path.shape == (n_features, n_targets, 5)
     assert_array_almost_equal(path[:, :, -1], last)
 
 
@@ -205,7 +205,7 @@ def test_omp_cv():
     ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                         max_iter=10)
     ompcv.fit(X, y_)
-    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
+    assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
     assert_array_almost_equal(ompcv.coef_, gamma_)
     omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                     n_nonzero_coefs=ompcv.n_nonzero_coefs_)
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index f02db4ccdf237..d0253bbce64f6 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -79,7 +79,7 @@ def test_classifier_accuracy():
                     random_state=1, average=average, tol=None)
                 clf.fit(data, y)
                 score = clf.score(data, y)
-                assert_greater(score, 0.79)
+                assert score > 0.79
                 if average:
                     assert hasattr(clf, 'average_coef_')
                     assert hasattr(clf, 'average_intercept_')
@@ -99,7 +99,7 @@ def test_classifier_partial_fit():
             for t in range(30):
                 clf.partial_fit(data, y, classes)
             score = clf.score(data, y)
-            assert_greater(score, 0.79)
+            assert score > 0.79
             if average:
                 assert hasattr(clf, 'average_coef_')
                 assert hasattr(clf, 'average_intercept_')
@@ -240,7 +240,7 @@ def test_regressor_mse():
                     random_state=0, average=average, max_iter=5)
                 reg.fit(data, y_bin)
                 pred = reg.predict(data)
-                assert_less(np.mean((pred - y_bin) ** 2), 1.7)
+                assert np.mean((pred - y_bin) ** 2) < 1.7
                 if average:
                     assert hasattr(reg, 'average_coef_')
                     assert hasattr(reg, 'average_intercept_')
@@ -262,7 +262,7 @@ def test_regressor_partial_fit():
             for t in range(50):
                 reg.partial_fit(data, y_bin)
             pred = reg.predict(data)
-            assert_less(np.mean((pred - y_bin) ** 2), 1.7)
+            assert np.mean((pred - y_bin) ** 2) < 1.7
             if average:
                 assert hasattr(reg, 'average_coef_')
                 assert hasattr(reg, 'average_intercept_')
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index e46949c256e9f..75b91e7b50ba9 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -51,7 +51,7 @@ def test_perceptron_accuracy():
         clf = Perceptron(max_iter=100, tol=None, shuffle=False)
         clf.fit(data, y)
         score = clf.score(data, y)
-        assert_greater(score, 0.7)
+        assert score > 0.7
 
 
 # 0.23. warning about tol not having its correct default value.
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index 91b1bd34dc866..af8153590d2ff 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -45,13 +45,13 @@ def test_ransac_inliers_outliers():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 def test_ransac_is_data_valid():
     def is_data_valid(X, y):
-        assert_equal(X.shape[0], 2)
-        assert_equal(y.shape[0], 2)
+        assert X.shape[0] == 2
+        assert y.shape[0] == 2
         return False
 
     rng = np.random.RandomState(0)
@@ -69,8 +69,8 @@ def is_data_valid(X, y):
 
 def test_ransac_is_model_valid():
     def is_model_valid(estimator, X, y):
-        assert_equal(X.shape[0], 2)
-        assert_equal(y.shape[0], 2)
+        assert X.shape[0] == 2
+        assert y.shape[0] == 2
         return False
 
     base_estimator = LinearRegression()
@@ -99,7 +99,7 @@ def test_ransac_max_trials():
     for i in range(50):
         ransac_estimator.set_params(min_samples=2, random_state=i)
         ransac_estimator.fit(X, y)
-        assert_less(ransac_estimator.n_trials_, max_trials + 1)
+        assert ransac_estimator.n_trials_ < max_trials + 1
 
 def test_ransac_stop_n_inliers():
     base_estimator = LinearRegression()
@@ -108,7 +108,7 @@ def test_ransac_stop_n_inliers():
                                        random_state=0)
     ransac_estimator.fit(X, y)
 
-    assert_equal(ransac_estimator.n_trials_, 1)
+    assert ransac_estimator.n_trials_ == 1
 
 
 def test_ransac_stop_score():
@@ -118,7 +118,7 @@ def test_ransac_stop_score():
                                        random_state=0)
     ransac_estimator.fit(X, y)
 
-    assert_equal(ransac_estimator.n_trials_, 1)
+    assert ransac_estimator.n_trials_ == 1
 
 
 def test_ransac_score():
@@ -132,8 +132,8 @@ def test_ransac_score():
                                        residual_threshold=0.5, random_state=0)
     ransac_estimator.fit(X, y)
 
-    assert_equal(ransac_estimator.score(X[2:], y[2:]), 1)
-    assert_less(ransac_estimator.score(X[:2], y[:2]), 1)
+    assert ransac_estimator.score(X[2:], y[2:]) == 1
+    assert ransac_estimator.score(X[:2], y[:2]) < 1
 
 
 def test_ransac_predict():
@@ -147,7 +147,7 @@ def test_ransac_predict():
                                        residual_threshold=0.5, random_state=0)
     ransac_estimator.fit(X, y)
 
-    assert_equal(ransac_estimator.predict(X), np.zeros(100))
+    assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
 
 
 def test_ransac_resid_thresh_no_inliers():
@@ -160,9 +160,9 @@ def test_ransac_resid_thresh_no_inliers():
 
     msg = ("RANSAC could not find a valid consensus set")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
-    assert_equal(ransac_estimator.n_skips_no_inliers_, 5)
-    assert_equal(ransac_estimator.n_skips_invalid_data_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_model_, 0)
+    assert ransac_estimator.n_skips_no_inliers_ == 5
+    assert ransac_estimator.n_skips_invalid_data_ == 0
+    assert ransac_estimator.n_skips_invalid_model_ == 0
 
 
 def test_ransac_no_valid_data():
@@ -176,9 +176,9 @@ def is_data_valid(X, y):
 
     msg = ("RANSAC could not find a valid consensus set")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
-    assert_equal(ransac_estimator.n_skips_no_inliers_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_data_, 5)
-    assert_equal(ransac_estimator.n_skips_invalid_model_, 0)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 5
+    assert ransac_estimator.n_skips_invalid_model_ == 0
 
 
 def test_ransac_no_valid_model():
@@ -192,9 +192,9 @@ def is_model_valid(estimator, X, y):
 
     msg = ("RANSAC could not find a valid consensus set")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
-    assert_equal(ransac_estimator.n_skips_no_inliers_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_data_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_model_, 5)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 0
+    assert ransac_estimator.n_skips_invalid_model_ == 5
 
 
 def test_ransac_exceed_max_skips():
@@ -209,9 +209,9 @@ def is_data_valid(X, y):
 
     msg = ("RANSAC skipped more iterations than `max_skips`")
     assert_raises_regexp(ValueError, msg, ransac_estimator.fit, X, y)
-    assert_equal(ransac_estimator.n_skips_no_inliers_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_data_, 4)
-    assert_equal(ransac_estimator.n_skips_invalid_model_, 0)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 4
+    assert ransac_estimator.n_skips_invalid_model_ == 0
 
 
 def test_ransac_warn_exceed_max_skips():
@@ -233,9 +233,9 @@ def is_data_valid(X, y):
                                        max_trials=5)
 
     assert_warns(ConvergenceWarning, ransac_estimator.fit, X, y)
-    assert_equal(ransac_estimator.n_skips_no_inliers_, 0)
-    assert_equal(ransac_estimator.n_skips_invalid_data_, 4)
-    assert_equal(ransac_estimator.n_skips_invalid_model_, 0)
+    assert ransac_estimator.n_skips_no_inliers_ == 0
+    assert ransac_estimator.n_skips_invalid_data_ == 4
+    assert ransac_estimator.n_skips_invalid_model_ == 0
 
 
 def test_ransac_sparse_coo():
@@ -250,7 +250,7 @@ def test_ransac_sparse_coo():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 def test_ransac_sparse_csr():
@@ -265,7 +265,7 @@ def test_ransac_sparse_csr():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 def test_ransac_sparse_csc():
@@ -280,7 +280,7 @@ def test_ransac_sparse_csc():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 def test_ransac_none_estimator():
@@ -352,7 +352,7 @@ def test_ransac_multi_dimensional_targets():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 @pytest.mark.filterwarnings('ignore: The default value of multioutput')  # 0.23
@@ -409,7 +409,7 @@ def test_ransac_default_residual_threshold():
                                    ).astype(np.bool_)
     ref_inlier_mask[outliers] = False
 
-    assert_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
+    assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
 
 
 def test_ransac_dynamic_max_trials():
@@ -419,29 +419,29 @@ def test_ransac_dynamic_max_trials():
     #   Cambridge University Press, ISBN: 0521540518
 
     # e = 0%, min_samples = X
-    assert_equal(_dynamic_max_trials(100, 100, 2, 0.99), 1)
+    assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
 
     # e = 5%, min_samples = 2
-    assert_equal(_dynamic_max_trials(95, 100, 2, 0.99), 2)
+    assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
     # e = 10%, min_samples = 2
-    assert_equal(_dynamic_max_trials(90, 100, 2, 0.99), 3)
+    assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
     # e = 30%, min_samples = 2
-    assert_equal(_dynamic_max_trials(70, 100, 2, 0.99), 7)
+    assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
     # e = 50%, min_samples = 2
-    assert_equal(_dynamic_max_trials(50, 100, 2, 0.99), 17)
+    assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
 
     # e = 5%, min_samples = 8
-    assert_equal(_dynamic_max_trials(95, 100, 8, 0.99), 5)
+    assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
     # e = 10%, min_samples = 8
-    assert_equal(_dynamic_max_trials(90, 100, 8, 0.99), 9)
+    assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
     # e = 30%, min_samples = 8
-    assert_equal(_dynamic_max_trials(70, 100, 8, 0.99), 78)
+    assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
     # e = 50%, min_samples = 8
-    assert_equal(_dynamic_max_trials(50, 100, 8, 0.99), 1177)
+    assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
 
     # e = 0%, min_samples = 10
-    assert_equal(_dynamic_max_trials(1, 100, 10, 0), 0)
-    assert_equal(_dynamic_max_trials(1, 100, 10, 1), float('inf'))
+    assert _dynamic_max_trials(1, 100, 10, 0) == 0
+    assert _dynamic_max_trials(1, 100, 10, 1) == float('inf')
 
     base_estimator = LinearRegression()
     ransac_estimator = RANSACRegressor(base_estimator, min_samples=2,
@@ -458,7 +458,7 @@ def test_ransac_fit_sample_weight():
     weights = np.ones(n_samples)
     ransac_estimator.fit(X, y, weights)
     # sanity check
-    assert_equal(ransac_estimator.inlier_mask_.shape[0], n_samples)
+    assert ransac_estimator.inlier_mask_.shape[0] == n_samples
 
     ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_
                                    ).astype(np.bool_)
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index cfc487c6ffe66..de517b20e859f 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -77,13 +77,13 @@ def test_ridge(solver):
 
     ridge = Ridge(alpha=alpha, solver=solver)
     ridge.fit(X, y)
-    assert_equal(ridge.coef_.shape, (X.shape[1], ))
-    assert_greater(ridge.score(X, y), 0.47)
+    assert ridge.coef_.shape == (X.shape[1], )
+    assert ridge.score(X, y) > 0.47
 
     if solver in ("cholesky", "sag"):
         # Currently the only solvers to support sample_weight.
         ridge.fit(X, y, sample_weight=np.ones(n_samples))
-        assert_greater(ridge.score(X, y), 0.47)
+        assert ridge.score(X, y) > 0.47
 
     # With more features than samples
     n_samples, n_features = 5, 10
@@ -91,12 +91,12 @@ def test_ridge(solver):
     X = rng.randn(n_samples, n_features)
     ridge = Ridge(alpha=alpha, solver=solver)
     ridge.fit(X, y)
-    assert_greater(ridge.score(X, y), .9)
+    assert ridge.score(X, y) > .9
 
     if solver in ("cholesky", "sag"):
         # Currently the only solvers to support sample_weight.
         ridge.fit(X, y, sample_weight=np.ones(n_samples))
-        assert_greater(ridge.score(X, y), 0.9)
+        assert ridge.score(X, y) > 0.9
 
 
 def test_primal_dual_relationship():
@@ -119,7 +119,7 @@ def test_ridge_singular():
 
     ridge = Ridge(alpha=0)
     ridge.fit(X, y)
-    assert_greater(ridge.score(X, y), 0.9)
+    assert ridge.score(X, y) > 0.9
 
 
 def test_ridge_regression_sample_weights():
@@ -214,16 +214,16 @@ def test_ridge_shapes():
     ridge = Ridge()
 
     ridge.fit(X, y)
-    assert_equal(ridge.coef_.shape, (n_features,))
-    assert_equal(ridge.intercept_.shape, ())
+    assert ridge.coef_.shape == (n_features,)
+    assert ridge.intercept_.shape == ()
 
     ridge.fit(X, Y1)
-    assert_equal(ridge.coef_.shape, (1, n_features))
-    assert_equal(ridge.intercept_.shape, (1, ))
+    assert ridge.coef_.shape == (1, n_features)
+    assert ridge.intercept_.shape == (1, )
 
     ridge.fit(X, Y)
-    assert_equal(ridge.coef_.shape, (2, n_features))
-    assert_equal(ridge.intercept_.shape, (2, ))
+    assert ridge.coef_.shape == (2, n_features)
+    assert ridge.intercept_.shape == (2, )
 
 
 def test_ridge_intercept():
@@ -254,16 +254,16 @@ def test_toy_ridge_object():
     X_test = [[1], [2], [3], [4]]
     assert_almost_equal(reg.predict(X_test), [1., 2, 3, 4])
 
-    assert_equal(len(reg.coef_.shape), 1)
-    assert_equal(type(reg.intercept_), np.float64)
+    assert len(reg.coef_.shape) == 1
+    assert type(reg.intercept_) == np.float64
 
     Y = np.vstack((Y, Y)).T
 
     reg.fit(X, Y)
     X_test = [[1], [2], [3], [4]]
 
-    assert_equal(len(reg.coef_.shape), 2)
-    assert_equal(type(reg.intercept_), np.ndarray)
+    assert len(reg.coef_.shape) == 2
+    assert type(reg.intercept_) == np.ndarray
 
 
 def test_ridge_vs_lstsq():
@@ -616,7 +616,7 @@ def _test_ridge_cv_normalize(filter_):
     gs = GridSearchCV(Ridge(normalize=True, solver='sparse_cg'), cv=3,
                       param_grid={'alpha': ridge_cv.alphas})
     gs.fit(filter_(10. * X_diabetes), y_diabetes)
-    assert_equal(gs.best_estimator_.alpha, ridge_cv.alpha_)
+    assert gs.best_estimator_.alpha == ridge_cv.alpha_
 
 
 def _test_ridge_cv(filter_):
@@ -624,16 +624,16 @@ def _test_ridge_cv(filter_):
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)
     ridge_cv.predict(filter_(X_diabetes))
 
-    assert_equal(len(ridge_cv.coef_.shape), 1)
-    assert_equal(type(ridge_cv.intercept_), np.float64)
+    assert len(ridge_cv.coef_.shape) == 1
+    assert type(ridge_cv.intercept_) == np.float64
 
     cv = KFold(5)
     ridge_cv.set_params(cv=cv)
     ridge_cv.fit(filter_(X_diabetes), y_diabetes)
     ridge_cv.predict(filter_(X_diabetes))
 
-    assert_equal(len(ridge_cv.coef_.shape), 1)
-    assert_equal(type(ridge_cv.intercept_), np.float64)
+    assert len(ridge_cv.coef_.shape) == 1
+    assert type(ridge_cv.intercept_) == np.float64
 
 
 def _test_ridge_diabetes(filter_):
@@ -649,7 +649,7 @@ def _test_multi_ridge_diabetes(filter_):
 
     ridge = Ridge(fit_intercept=False)
     ridge.fit(filter_(X_diabetes), Y)
-    assert_equal(ridge.coef_.shape, (2, n_features))
+    assert ridge.coef_.shape == (2, n_features)
     Y_pred = ridge.predict(filter_(X_diabetes))
     ridge.fit(filter_(X_diabetes), y_diabetes)
     y_pred = ridge.predict(filter_(X_diabetes))
@@ -662,9 +662,9 @@ def _test_ridge_classifiers(filter_):
     n_features = X_iris.shape[1]
     for reg in (RidgeClassifier(), RidgeClassifierCV()):
         reg.fit(filter_(X_iris), y_iris)
-        assert_equal(reg.coef_.shape, (n_classes, n_features))
+        assert reg.coef_.shape == (n_classes, n_features)
         y_pred = reg.predict(filter_(X_iris))
-        assert_greater(np.mean(y_iris == y_pred), .79)
+        assert np.mean(y_iris == y_pred) > .79
 
     cv = KFold(5)
     reg = RidgeClassifierCV(cv=cv)
@@ -743,7 +743,7 @@ def test_class_weights():
     reg.fit(X, y)
     rega = RidgeClassifier(class_weight='balanced')
     rega.fit(X, y)
-    assert_equal(len(rega.classes_), 2)
+    assert len(rega.classes_) == 2
     assert_array_almost_equal(reg.coef_, rega.coef_)
     assert_array_almost_equal(reg.intercept_, rega.intercept_)
 
@@ -988,7 +988,7 @@ def func():
 def test_sparse_cg_max_iter():
     reg = Ridge(solver="sparse_cg", max_iter=1)
     reg.fit(X_diabetes, y_diabetes)
-    assert_equal(reg.coef_.shape[0], X_diabetes.shape[1])
+    assert reg.coef_.shape[0] == X_diabetes.shape[1]
 
 
 @ignore_warnings
@@ -1007,7 +1007,7 @@ def test_n_iter():
     for solver in ('sparse_cg', 'svd', 'cholesky'):
         reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
         reg.fit(X, y_n)
-        assert_equal(reg.n_iter_, None)
+        assert reg.n_iter_ == None
 
 
 def test_ridge_fit_intercept_sparse():
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index f1887302f5b46..3407d00fb1cc4 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -471,8 +471,8 @@ def test_sag_regressor():
     clf2.fit(sp.csr_matrix(X), y)
     score1 = clf1.score(X, y)
     score2 = clf2.score(X, y)
-    assert_greater(score1, 0.99)
-    assert_greater(score2, 0.99)
+    assert score1 > 0.99
+    assert score2 > 0.99
 
     # simple linear function with noise
     y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
@@ -485,8 +485,8 @@ def test_sag_regressor():
     score1 = clf1.score(X, y)
     score2 = clf2.score(X, y)
     score2 = clf2.score(X, y)
-    assert_greater(score1, 0.5)
-    assert_greater(score2, 0.5)
+    assert score1 > 0.5
+    assert score2 > 0.5
 
 
 @pytest.mark.filterwarnings('ignore:The max_iter was reached')
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 7e5f88ce2e0cf..abdb2ecefd10b 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -211,13 +211,13 @@ def _test_warm_start(klass, X, Y, lr):
                  warm_start=True, learning_rate=lr)
     clf3.fit(X, Y)
 
-    assert_equal(clf3.t_, clf.t_)
+    assert clf3.t_ == clf.t_
     assert_array_almost_equal(clf3.coef_, clf.coef_)
 
     clf3.set_params(alpha=0.001)
     clf3.fit(X, Y)
 
-    assert_equal(clf3.t_, clf2.t_)
+    assert clf3.t_ == clf2.t_
     assert_array_almost_equal(clf3.coef_, clf2.coef_)
 
 
@@ -583,9 +583,9 @@ def test_partial_fit_weight_class_balanced(klass):
 def test_sgd_multiclass(klass):
     # Multi-class test case
     clf = klass(alpha=0.01, max_iter=20).fit(X2, Y2)
-    assert_equal(clf.coef_.shape, (3, 2))
-    assert_equal(clf.intercept_.shape, (3,))
-    assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
+    assert clf.coef_.shape == (3, 2)
+    assert clf.intercept_.shape == (3,)
+    assert clf.decision_function([[0, 0]]).shape == (1, 3)
     pred = clf.predict(T2)
     assert_array_equal(pred, true_result2)
 
@@ -621,7 +621,7 @@ def test_sgd_multiclass_with_init_coef(klass):
     clf = klass(alpha=0.01, max_iter=20)
     clf.fit(X2, Y2, coef_init=np.zeros((3, 2)),
             intercept_init=np.zeros(3))
-    assert_equal(clf.coef_.shape, (3, 2))
+    assert clf.coef_.shape == (3, 2)
     assert clf.intercept_.shape, (3,)
     pred = clf.predict(T2)
     assert_array_equal(pred, true_result2)
@@ -631,9 +631,9 @@ def test_sgd_multiclass_with_init_coef(klass):
 def test_sgd_multiclass_njobs(klass):
     # Multi-class test case with multi-core support
     clf = klass(alpha=0.01, max_iter=20, n_jobs=2).fit(X2, Y2)
-    assert_equal(clf.coef_.shape, (3, 2))
-    assert_equal(clf.intercept_.shape, (3,))
-    assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
+    assert clf.coef_.shape == (3, 2)
+    assert clf.intercept_.shape == (3,)
+    assert clf.decision_function([[0, 0]]).shape == (1, 3)
     pred = clf.predict(T2)
     assert_array_equal(pred, true_result2)
 
@@ -739,9 +739,9 @@ def test_sgd_proba(klass):
     d = clf.decision_function([[3, 2]])
     p = clf.predict_proba([[3, 2]])
     if klass != SparseSGDClassifier:
-        assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
+        assert np.argmax(d, axis=1) == np.argmax(p, axis=1)
     else:   # XXX the sparse test gets a different X2 (?)
-        assert_equal(np.argmin(d, axis=1), np.argmin(p, axis=1))
+        assert np.argmin(d, axis=1) == np.argmin(p, axis=1)
 
     # the following sample produces decision_function values < -1,
     # which would cause naive normalization to fail (see comment
@@ -896,14 +896,14 @@ def test_balanced_weight(klass):
     clf = klass(max_iter=1000, class_weight=None, shuffle=False)
     clf.fit(X_imbalanced, y_imbalanced)
     y_pred = clf.predict(X)
-    assert_less(metrics.f1_score(y, y_pred, average='weighted'), 0.96)
+    assert metrics.f1_score(y, y_pred, average='weighted') < 0.96
 
     # fit a model with balanced class_weight enabled
     clf = klass(max_iter=1000, class_weight="balanced",
                 shuffle=False)
     clf.fit(X_imbalanced, y_imbalanced)
     y_pred = clf.predict(X)
-    assert_greater(metrics.f1_score(y, y_pred, average='weighted'), 0.96)
+    assert metrics.f1_score(y, y_pred, average='weighted') > 0.96
 
 
 @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier])
@@ -947,9 +947,9 @@ def test_partial_fit_binary(klass):
     classes = np.unique(Y)
 
     clf.partial_fit(X[:third], Y[:third], classes=classes)
-    assert_equal(clf.coef_.shape, (1, X.shape[1]))
-    assert_equal(clf.intercept_.shape, (1,))
-    assert_equal(clf.decision_function([[0, 0]]).shape, (1, ))
+    assert clf.coef_.shape == (1, X.shape[1])
+    assert clf.intercept_.shape == (1,)
+    assert clf.decision_function([[0, 0]]).shape == (1, )
     id1 = id(clf.coef_.data)
 
     clf.partial_fit(X[third:], Y[third:])
@@ -968,9 +968,9 @@ def test_partial_fit_multiclass(klass):
     classes = np.unique(Y2)
 
     clf.partial_fit(X2[:third], Y2[:third], classes=classes)
-    assert_equal(clf.coef_.shape, (3, X2.shape[1]))
-    assert_equal(clf.intercept_.shape, (3,))
-    assert_equal(clf.decision_function([[0, 0]]).shape, (1, 3))
+    assert clf.coef_.shape == (3, X2.shape[1])
+    assert clf.intercept_.shape == (3,)
+    assert clf.decision_function([[0, 0]]).shape == (1, 3)
     id1 = id(clf.coef_.data)
 
     clf.partial_fit(X2[third:], Y2[third:])
@@ -986,12 +986,12 @@ def test_partial_fit_multiclass_average(klass):
     classes = np.unique(Y2)
 
     clf.partial_fit(X2[:third], Y2[:third], classes=classes)
-    assert_equal(clf.coef_.shape, (3, X2.shape[1]))
-    assert_equal(clf.intercept_.shape, (3,))
+    assert clf.coef_.shape == (3, X2.shape[1])
+    assert clf.intercept_.shape == (3,)
 
     clf.partial_fit(X2[third:], Y2[third:])
-    assert_equal(clf.coef_.shape, (3, X2.shape[1]))
-    assert_equal(clf.intercept_.shape, (3,))
+    assert clf.coef_.shape == (3, X2.shape[1])
+    assert clf.intercept_.shape == (3,)
 
 
 @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier])
@@ -1022,7 +1022,7 @@ def test_partial_fit_equal_fit_classif(klass, lr):
             clf.partial_fit(X_, Y_, classes=classes)
         y_pred2 = clf.decision_function(T_)
 
-        assert_equal(clf.t_, t)
+        assert clf.t_ == t
         assert_array_almost_equal(y_pred, y_pred2, decimal=2)
 
 
@@ -1033,22 +1033,22 @@ def test_regression_losses(klass):
                 eta0=0.1, loss="epsilon_insensitive",
                 random_state=random_state)
     clf.fit(X, Y)
-    assert_equal(1.0, np.mean(clf.predict(X) == Y))
+    assert 1.0 == np.mean(clf.predict(X) == Y)
 
     clf = klass(alpha=0.01, learning_rate="constant",
                 eta0=0.1, loss="squared_epsilon_insensitive",
                 random_state=random_state)
     clf.fit(X, Y)
-    assert_equal(1.0, np.mean(clf.predict(X) == Y))
+    assert 1.0 == np.mean(clf.predict(X) == Y)
 
     clf = klass(alpha=0.01, loss="huber", random_state=random_state)
     clf.fit(X, Y)
-    assert_equal(1.0, np.mean(clf.predict(X) == Y))
+    assert 1.0 == np.mean(clf.predict(X) == Y)
 
     clf = klass(alpha=0.01, learning_rate="constant", eta0=0.01,
                 loss="squared_loss", random_state=random_state)
     clf.fit(X, Y)
-    assert_equal(1.0, np.mean(clf.predict(X) == Y))
+    assert 1.0 == np.mean(clf.predict(X) == Y)
 
 
 @pytest.mark.parametrize('klass', [SGDClassifier, SparseSGDClassifier])
@@ -1076,7 +1076,7 @@ def test_sgd_reg(klass):
     # Check that SGD gives any results.
     clf = klass(alpha=0.1, max_iter=2, fit_intercept=False)
     clf.fit([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
-    assert_equal(clf.coef_[0], clf.coef_[1])
+    assert clf.coef_[0] == clf.coef_[1]
 
 
 @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor])
@@ -1177,7 +1177,7 @@ def test_sgd_least_squares_fit(klass):
                 fit_intercept=False)
     clf.fit(X, y)
     score = clf.score(X, y)
-    assert_greater(score, 0.99)
+    assert score > 0.99
 
     # simple linear function with noise
     y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
@@ -1186,7 +1186,7 @@ def test_sgd_least_squares_fit(klass):
                 fit_intercept=False)
     clf.fit(X, y)
     score = clf.score(X, y)
-    assert_greater(score, 0.5)
+    assert score > 0.5
 
 
 @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor])
@@ -1231,7 +1231,7 @@ def test_sgd_huber_fit(klass):
                 fit_intercept=False)
     clf.fit(X, y)
     score = clf.score(X, y)
-    assert_greater(score, 0.99)
+    assert score > 0.99
 
     # simple linear function with noise
     y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
@@ -1240,7 +1240,7 @@ def test_sgd_huber_fit(klass):
                 fit_intercept=False)
     clf.fit(X, y)
     score = clf.score(X, y)
-    assert_greater(score, 0.5)
+    assert score > 0.5
 
 
 @pytest.mark.parametrize('klass', [SGDRegressor, SparseSGDRegressor])
@@ -1279,9 +1279,9 @@ def test_partial_fit(klass):
     clf = klass(alpha=0.01)
 
     clf.partial_fit(X[:third], Y[:third])
-    assert_equal(clf.coef_.shape, (X.shape[1], ))
-    assert_equal(clf.intercept_.shape, (1,))
-    assert_equal(clf.predict([[0, 0]]).shape, (1, ))
+    assert clf.coef_.shape == (X.shape[1], )
+    assert clf.intercept_.shape == (1,)
+    assert clf.predict([[0, 0]]).shape == (1, )
     id1 = id(clf.coef_.data)
 
     clf.partial_fit(X[third:], Y[third:])
@@ -1306,7 +1306,7 @@ def test_partial_fit_equal_fit(klass, lr):
         clf.partial_fit(X, Y)
     y_pred2 = clf.predict(T)
 
-    assert_equal(clf.t_, t)
+    assert clf.t_ == t
     assert_array_almost_equal(y_pred, y_pred2, decimal=2)
 
 
@@ -1407,25 +1407,25 @@ def test_tol_parameter():
     max_iter = 42
     model_0 = SGDClassifier(tol=None, random_state=0, max_iter=max_iter)
     model_0.fit(X, y)
-    assert_equal(max_iter, model_0.n_iter_)
+    assert max_iter == model_0.n_iter_
 
     # If tol is not None, the number of iteration should be less than max_iter
     max_iter = 2000
     model_1 = SGDClassifier(tol=0, random_state=0, max_iter=max_iter)
     model_1.fit(X, y)
-    assert_greater(max_iter, model_1.n_iter_)
-    assert_greater(model_1.n_iter_, 5)
+    assert max_iter > model_1.n_iter_
+    assert model_1.n_iter_ > 5
 
     # A larger tol should yield a smaller number of iteration
     model_2 = SGDClassifier(tol=0.1, random_state=0, max_iter=max_iter)
     model_2.fit(X, y)
-    assert_greater(model_1.n_iter_, model_2.n_iter_)
-    assert_greater(model_2.n_iter_, 3)
+    assert model_1.n_iter_ > model_2.n_iter_
+    assert model_2.n_iter_ > 3
 
     # Strict tolerance and small max_iter should trigger a warning
     model_3 = SGDClassifier(max_iter=3, tol=1e-3, random_state=0)
     model_3 = assert_warns(ConvergenceWarning, model_3.fit, X, y)
-    assert_equal(model_3.n_iter_, 3)
+    assert model_3.n_iter_ == 3
 
 
 def _test_gradient_common(loss_function, cases):
diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
index d637ae1385052..326bcc94433bc 100644
--- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -21,7 +21,7 @@ def test_sparse_coef():
     clf.coef_ = [1, 2, 3]
 
     assert sp.isspmatrix(clf.sparse_coef_)
-    assert_equal(clf.sparse_coef_.toarray().tolist()[0], clf.coef_)
+    assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
 
 
 def test_normalize_option():
@@ -162,7 +162,7 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
     s_clf.fit(X_train, y_train)
 
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
-    assert_greater(s_clf.score(X_test, y_test), 0.85)
+    assert s_clf.score(X_test, y_test) > 0.85
 
     # check the convergence is the same as the dense version
     d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
@@ -171,13 +171,13 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
     d_clf.fit(X_train.toarray(), y_train)
 
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
-    assert_greater(d_clf.score(X_test, y_test), 0.85)
+    assert d_clf.score(X_test, y_test) > 0.85
 
     assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
     assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
 
     # check that the coefs are sparse
-    assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)
+    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
 
 
 def test_sparse_enet_not_as_toy_dataset():
@@ -203,16 +203,16 @@ def test_sparse_lasso_not_as_toy_dataset():
     s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
     s_clf.fit(X_train, y_train)
     assert_almost_equal(s_clf.dual_gap_, 0, 4)
-    assert_greater(s_clf.score(X_test, y_test), 0.85)
+    assert s_clf.score(X_test, y_test) > 0.85
 
     # check the convergence is the same as the dense version
     d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
     d_clf.fit(X_train.toarray(), y_train)
     assert_almost_equal(d_clf.dual_gap_, 0, 4)
-    assert_greater(d_clf.score(X_test, y_test), 0.85)
+    assert d_clf.score(X_test, y_test) > 0.85
 
     # check that the coefs are sparse
-    assert_equal(np.sum(s_clf.coef_ != 0.0), n_informative)
+    assert np.sum(s_clf.coef_ != 0.0) == n_informative
 
 
 def test_enet_multitarget():
@@ -241,8 +241,8 @@ def test_path_parameters():
                        l1_ratio=0.5, fit_intercept=False)
     ignore_warnings(clf.fit)(X, y)  # new params
     assert_almost_equal(0.5, clf.l1_ratio)
-    assert_equal(n_alphas, clf.n_alphas)
-    assert_equal(n_alphas, len(clf.alphas_))
+    assert n_alphas == clf.n_alphas
+    assert n_alphas == len(clf.alphas_)
     sparse_mse_path = clf.mse_path_
     ignore_warnings(clf.fit)(X.toarray(), y)  # compare with dense data
     assert_almost_equal(clf.mse_path_, sparse_mse_path)
diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py
index 57277a68abd88..70468b4bb8464 100644
--- a/sklearn/linear_model/tests/test_theil_sen.py
+++ b/sklearn/linear_model/tests/test_theil_sen.py
@@ -163,7 +163,7 @@ def test_theil_sen_1d():
     X, y, w, c = gen_toy_problem_1d()
     # Check that Least Squares fails
     lstq = LinearRegression().fit(X, y)
-    assert_greater(np.abs(lstq.coef_ - w), 0.9)
+    assert np.abs(lstq.coef_ - w) > 0.9
     # Check that Theil-Sen works
     theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
     assert_array_almost_equal(theil_sen.coef_, w, 1)
@@ -174,7 +174,7 @@ def test_theil_sen_1d_no_intercept():
     X, y, w, c = gen_toy_problem_1d(intercept=False)
     # Check that Least Squares fails
     lstq = LinearRegression(fit_intercept=False).fit(X, y)
-    assert_greater(np.abs(lstq.coef_ - w - c), 0.5)
+    assert np.abs(lstq.coef_ - w - c) > 0.5
     # Check that Theil-Sen works
     theil_sen = TheilSenRegressor(fit_intercept=False,
                                   random_state=0).fit(X, y)
@@ -186,7 +186,7 @@ def test_theil_sen_2d():
     X, y, w, c = gen_toy_problem_2d()
     # Check that Least Squares fails
     lstq = LinearRegression().fit(X, y)
-    assert_greater(norm(lstq.coef_ - w), 1.0)
+    assert norm(lstq.coef_ - w) > 1.0
     # Check that Theil-Sen works
     theil_sen = TheilSenRegressor(max_subpopulation=1e3,
                                   random_state=0).fit(X, y)
@@ -196,7 +196,7 @@ def test_theil_sen_2d():
 
 def test_calc_breakdown_point():
     bp = _breakdown_point(1e10, 2)
-    assert_less(np.abs(bp - 1 + 1 / (np.sqrt(2))), 1.e-6)
+    assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.e-6
 
 
 def test_checksubparams_negative_subpopulation():
@@ -257,7 +257,7 @@ def test_theil_sen_parallel():
     X, y, w, c = gen_toy_problem_2d()
     # Check that Least Squares fails
     lstq = LinearRegression().fit(X, y)
-    assert_greater(norm(lstq.coef_ - w), 1.0)
+    assert norm(lstq.coef_ - w) > 1.0
     # Check that Theil-Sen works
     theil_sen = TheilSenRegressor(n_jobs=2,
                                   random_state=0,

From d9ee2911ff8717a81cabd24a3e0e3bc618e17fee Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:50:32 +0200
Subject: [PATCH 10/22] fix manifold

---
 sklearn/manifold/tests/test_isomap.py         |  6 ++--
 sklearn/manifold/tests/test_locally_linear.py | 20 ++++++------
 .../manifold/tests/test_spectral_embedding.py | 18 +++++------
 sklearn/manifold/tests/test_t_sne.py          | 32 +++++++++----------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py
index d1a68164ee45c..da8607a31b916 100644
--- a/sklearn/manifold/tests/test_isomap.py
+++ b/sklearn/manifold/tests/test_isomap.py
@@ -101,7 +101,7 @@ def test_transform():
     X_iso2 = iso.transform(X + noise)
 
     # Make sure the rms error on re-embedding is comparable to noise_scale
-    assert_less(np.sqrt(np.mean((X_iso - X_iso2) ** 2)), 2 * noise_scale)
+    assert np.sqrt(np.mean((X_iso - X_iso2) ** 2)) < 2 * noise_scale
 
 
 def test_pipeline():
@@ -113,7 +113,7 @@ def test_pipeline():
         [('isomap', manifold.Isomap()),
          ('clf', neighbors.KNeighborsClassifier())])
     clf.fit(X, y)
-    assert_less(.9, clf.score(X, y))
+    assert .9 < clf.score(X, y)
 
 
 def test_isomap_clone_bug():
@@ -122,7 +122,7 @@ def test_isomap_clone_bug():
     for n_neighbors in [10, 15, 20]:
         model.set_params(n_neighbors=n_neighbors)
         model.fit(np.random.rand(50, 2))
-        assert_equal(model.nbrs_.n_neighbors,
+        assert (model.nbrs_.n_neighbors ==
                      n_neighbors)
 
 
diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py
index a1d48fd49e999..2e05710b79b9a 100644
--- a/sklearn/manifold/tests/test_locally_linear.py
+++ b/sklearn/manifold/tests/test_locally_linear.py
@@ -30,7 +30,7 @@ def test_barycenter_kneighbors_graph():
     # check that columns sum to one
     assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3))
     pred = np.dot(A.toarray(), X)
-    assert_less(linalg.norm(pred - X) / X.shape[0], 1)
+    assert linalg.norm(pred - X) / X.shape[0] < 1
 
 
 # ----------------------------------------------------------------------
@@ -52,7 +52,7 @@ def test_lle_simple_grid():
 
     N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
     reconstruction_error = linalg.norm(np.dot(N, X) - X, 'fro')
-    assert_less(reconstruction_error, tol)
+    assert reconstruction_error < tol
 
     for solver in eigen_solvers:
         clf.set_params(eigen_solver=solver)
@@ -61,14 +61,14 @@ def test_lle_simple_grid():
         reconstruction_error = linalg.norm(
             np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
 
-        assert_less(reconstruction_error, tol)
+        assert reconstruction_error < tol
         assert_almost_equal(clf.reconstruction_error_,
                             reconstruction_error, decimal=1)
 
     # re-embed a noisy version of X using the transform method
     noise = rng.randn(*X.shape) / 100
     X_reembedded = clf.transform(X + noise)
-    assert_less(linalg.norm(X_reembedded - clf.embedding_), tol)
+    assert linalg.norm(X_reembedded - clf.embedding_) < tol
 
 
 def test_lle_manifold():
@@ -86,7 +86,7 @@ def test_lle_manifold():
 
         N = barycenter_kneighbors_graph(X, clf.n_neighbors).toarray()
         reconstruction_error = linalg.norm(np.dot(N, X) - X)
-        assert_less(reconstruction_error, tol)
+        assert reconstruction_error < tol
 
         for solver in eigen_solvers:
             clf.set_params(eigen_solver=solver)
@@ -95,10 +95,10 @@ def test_lle_manifold():
             reconstruction_error = linalg.norm(
                 np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
             details = ("solver: %s, method: %s" % (solver, method))
-            assert_less(reconstruction_error, tol, msg=details)
-            assert_less(np.abs(clf.reconstruction_error_ -
-                               reconstruction_error),
-                        tol * reconstruction_error, msg=details)
+            assert reconstruction_error < tol, details
+            assert (np.abs(clf.reconstruction_error_ -
+                               reconstruction_error) <
+                        tol * reconstruction_error), details
 
 
 # Test the error raised when parameter passed to lle is invalid
@@ -124,7 +124,7 @@ def test_pipeline():
         [('filter', manifold.LocallyLinearEmbedding(random_state=0)),
          ('clf', neighbors.KNeighborsClassifier())])
     clf.fit(X, y)
-    assert_less(.9, clf.score(X, y))
+    assert .9 < clf.score(X, y)
 
 
 # Test the error raised when the weight matrix is singular
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index 3d7e643d5697f..b6e3879b8beab 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -77,12 +77,12 @@ def test_sparse_graph_connected_component():
     for start, stop in zip(boundaries[:-1], boundaries[1:]):
         component_1 = _graph_connected_component(affinity, p[start])
         component_size = stop - start
-        assert_equal(component_1.sum(), component_size)
+        assert component_1.sum() == component_size
 
         # We should retrieve the same component mask by starting by both ends
         # of the group
         component_2 = _graph_connected_component(affinity, p[stop - 1])
-        assert_equal(component_2.sum(), component_size)
+        assert component_2.sum() == component_size
         assert_array_equal(component_1, component_2)
 
 
@@ -123,7 +123,7 @@ def test_spectral_embedding_two_components(seed=36):
         se_precomp.fit_transform(affinity.astype(np.float32))
     # thresholding on the first components using 0.
     label_ = np.array(embedded_coordinate.ravel() < 0, dtype="float")
-    assert_equal(normalized_mutual_info_score(true_label, label_), 1.0)
+    assert normalized_mutual_info_score(true_label, label_) == 1.0
 
 
 def test_spectral_embedding_precomputed_affinity(seed=36):
@@ -220,17 +220,17 @@ def test_connectivity(seed=36):
                       [0, 1, 1, 1, 0],
                       [0, 0, 1, 1, 1],
                       [0, 0, 0, 1, 1]])
-    assert_equal(_graph_is_connected(graph), False)
-    assert_equal(_graph_is_connected(sparse.csr_matrix(graph)), False)
-    assert_equal(_graph_is_connected(sparse.csc_matrix(graph)), False)
+    assert _graph_is_connected(graph) == False
+    assert _graph_is_connected(sparse.csr_matrix(graph)) == False
+    assert _graph_is_connected(sparse.csc_matrix(graph)) == False
     graph = np.array([[1, 1, 0, 0, 0],
                       [1, 1, 1, 0, 0],
                       [0, 1, 1, 1, 0],
                       [0, 0, 1, 1, 1],
                       [0, 0, 0, 1, 1]])
-    assert_equal(_graph_is_connected(graph), True)
-    assert_equal(_graph_is_connected(sparse.csr_matrix(graph)), True)
-    assert_equal(_graph_is_connected(sparse.csc_matrix(graph)), True)
+    assert _graph_is_connected(graph) == True
+    assert _graph_is_connected(sparse.csr_matrix(graph)) == True
+    assert _graph_is_connected(sparse.csc_matrix(graph)) == True
 
 
 def test_spectral_embedding_deterministic():
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 59a2a71660c32..ceb569026489d 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -70,8 +70,8 @@ def flat_function(_, compute_error=True):
         out = sys.stdout.getvalue()
         sys.stdout.close()
         sys.stdout = old_stdout
-    assert_equal(error, 1.0)
-    assert_equal(it, 0)
+    assert error == 1.0
+    assert it == 0
     assert("gradient norm" in out)
 
     # Maximum number of iterations without improvement
@@ -86,8 +86,8 @@ def flat_function(_, compute_error=True):
         out = sys.stdout.getvalue()
         sys.stdout.close()
         sys.stdout = old_stdout
-    assert_equal(error, 0.0)
-    assert_equal(it, 11)
+    assert error == 0.0
+    assert it == 11
     assert("did not make any progress" in out)
 
     # Maximum number of iterations
@@ -102,8 +102,8 @@ def flat_function(_, compute_error=True):
         out = sys.stdout.getvalue()
         sys.stdout.close()
         sys.stdout = old_stdout
-    assert_equal(error, 0.0)
-    assert_equal(it, 10)
+    assert error == 0.0
+    assert it == 10
     assert("Iteration 10" in out)
 
 
@@ -226,13 +226,13 @@ def test_trustworthiness():
 
     # Affine transformation
     X = random_state.randn(100, 2)
-    assert_equal(trustworthiness(X, 5.0 + X / 10.0), 1.0)
+    assert trustworthiness(X, 5.0 + X / 10.0) == 1.0
 
     # Randomly shuffled
     X = np.arange(100).reshape(-1, 1)
     X_embedded = X.copy()
     random_state.shuffle(X_embedded)
-    assert_less(trustworthiness(X, X_embedded), 0.6)
+    assert trustworthiness(X, X_embedded) < 0.6
 
     # Completely different
     X = np.arange(5).reshape(-1, 1)
@@ -264,8 +264,8 @@ def test_optimization_minimizes_kl_divergence():
                     n_iter=n_iter, random_state=0)
         tsne.fit_transform(X)
         kl_divergences.append(tsne.kl_divergence_)
-    assert_less_equal(kl_divergences[1], kl_divergences[0])
-    assert_less_equal(kl_divergences[2], kl_divergences[1])
+    assert kl_divergences[1] <= kl_divergences[0]
+    assert kl_divergences[2] <= kl_divergences[1]
 
 
 def test_fit_csr_matrix():
@@ -300,7 +300,7 @@ def test_trustworthiness_not_euclidean_metric():
     # 'precomputed'
     random_state = check_random_state(0)
     X = random_state.randn(100, 2)
-    assert_equal(trustworthiness(X, X, metric='cosine'),
+    assert (trustworthiness(X, X, metric='cosine') ==
                  trustworthiness(pairwise_distances(X, metric='cosine'), X,
                                  metric='precomputed'))
 
@@ -671,8 +671,8 @@ def test_n_iter_without_progress():
             sys.stdout = old_stdout
 
         # The output needs to contain the value of n_iter_without_progress
-        assert_in("did not make any progress during the "
-                  "last -1 episodes. Finished.", out)
+        assert ("did not make any progress during the "
+                  "last -1 episodes. Finished." in out)
 
 
 def test_min_grad_norm():
@@ -715,7 +715,7 @@ def test_min_grad_norm():
 
     # The gradient norm can be smaller than min_grad_norm at most once,
     # because in the moment it becomes smaller the optimization stops
-    assert_less_equal(n_smaller_gradient_norms, 1)
+    assert n_smaller_gradient_norms <= 1
 
 
 def test_accessible_kl_divergence():
@@ -790,8 +790,8 @@ def assert_uniform_grid(Y, try_name=None):
     smallest_to_mean = dist_to_nn.min() / np.mean(dist_to_nn)
     largest_to_mean = dist_to_nn.max() / np.mean(dist_to_nn)
 
-    assert_greater(smallest_to_mean, .5, msg=try_name)
-    assert_less(largest_to_mean, 2, msg=try_name)
+    assert smallest_to_mean > .5, try_name
+    assert largest_to_mean < 2, try_name
 
 
 def test_bh_match_exact():

From c86851c71f5bd759bef5a8c6fb6fcb60d4d36adb Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:54:31 +0200
Subject: [PATCH 11/22] fix metrics

---
 .../metrics/cluster/tests/test_bicluster.py   |  24 +--
 .../metrics/cluster/tests/test_supervised.py  |  24 +--
 .../cluster/tests/test_unsupervised.py        |  22 +-
 sklearn/metrics/tests/test_classification.py  | 198 +++++++++---------
 sklearn/metrics/tests/test_common.py          |   8 +-
 sklearn/metrics/tests/test_pairwise.py        |  52 ++---
 sklearn/metrics/tests/test_ranking.py         |  78 +++----
 sklearn/metrics/tests/test_regression.py      |  12 +-
 sklearn/metrics/tests/test_score_objects.py   |  14 +-
 9 files changed, 216 insertions(+), 216 deletions(-)

diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py
index 1ca98b744a7b1..d98ee1fc86b20 100644
--- a/sklearn/metrics/cluster/tests/test_bicluster.py
+++ b/sklearn/metrics/cluster/tests/test_bicluster.py
@@ -14,10 +14,10 @@ def test_jaccard():
     a3 = np.array([False, True, True, False])
     a4 = np.array([False, False, True, True])
 
-    assert_equal(_jaccard(a1, a1, a1, a1), 1)
-    assert_equal(_jaccard(a1, a1, a2, a2), 0.25)
-    assert_equal(_jaccard(a1, a1, a3, a3), 1.0 / 7)
-    assert_equal(_jaccard(a1, a1, a4, a4), 0)
+    assert _jaccard(a1, a1, a1, a1) == 1
+    assert _jaccard(a1, a1, a2, a2) == 0.25
+    assert _jaccard(a1, a1, a3, a3) == 1.0 / 7
+    assert _jaccard(a1, a1, a4, a4) == 0
 
 
 def test_consensus_score():
@@ -25,15 +25,15 @@ def test_consensus_score():
          [False, False, True, True]]
     b = a[::-1]
 
-    assert_equal(consensus_score((a, a), (a, a)), 1)
-    assert_equal(consensus_score((a, a), (b, b)), 1)
-    assert_equal(consensus_score((a, b), (a, b)), 1)
-    assert_equal(consensus_score((a, b), (b, a)), 1)
+    assert consensus_score((a, a), (a, a)) == 1
+    assert consensus_score((a, a), (b, b)) == 1
+    assert consensus_score((a, b), (a, b)) == 1
+    assert consensus_score((a, b), (b, a)) == 1
 
-    assert_equal(consensus_score((a, a), (b, a)), 0)
-    assert_equal(consensus_score((a, a), (a, b)), 0)
-    assert_equal(consensus_score((b, b), (a, b)), 0)
-    assert_equal(consensus_score((b, b), (b, a)), 0)
+    assert consensus_score((a, a), (b, a)) == 0
+    assert consensus_score((a, a), (a, b)) == 0
+    assert consensus_score((b, b), (a, b)) == 0
+    assert consensus_score((b, b), (b, a)) == 0
 
 
 def test_consensus_score_issue2445():
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index eef3a7f46482d..a91ef7ef4ec4c 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -62,13 +62,13 @@ def test_generalized_average():
 @ignore_warnings(category=FutureWarning)
 def test_perfect_matches():
     for score_func in score_funcs:
-        assert_equal(score_func([], []), 1.0)
-        assert_equal(score_func([0], [1]), 1.0)
-        assert_equal(score_func([0, 0, 0], [0, 0, 0]), 1.0)
-        assert_equal(score_func([0, 1, 0], [42, 7, 42]), 1.0)
-        assert_equal(score_func([0., 1., 0.], [42., 7., 42.]), 1.0)
-        assert_equal(score_func([0., 1., 2.], [42., 7., 2.]), 1.0)
-        assert_equal(score_func([0, 1, 2], [42, 7, 2]), 1.0)
+        assert score_func([], []) == 1.0
+        assert score_func([0], [1]) == 1.0
+        assert score_func([0, 0, 0], [0, 0, 0]) == 1.0
+        assert score_func([0, 1, 0], [42, 7, 42]) == 1.0
+        assert score_func([0., 1., 0.], [42., 7., 42.]) == 1.0
+        assert score_func([0., 1., 2.], [42., 7., 2.]) == 1.0
+        assert score_func([0, 1, 2], [42, 7, 2]) == 1.0
     score_funcs_with_changing_means = [
         normalized_mutual_info_score,
         adjusted_mutual_info_score,
@@ -213,7 +213,7 @@ def test_adjusted_mutual_info_score():
     ami = adjusted_mutual_info_score(labels_a, labels_b)
     assert_almost_equal(ami, 0.27821, 5)
     ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3])
-    assert_equal(ami, 1.0)
+    assert ami == 1.0
     # Test with a very large array
     a110 = np.array([list(labels_a) * 110]).flatten()
     b110 = np.array([list(labels_b) * 110]).flatten()
@@ -275,10 +275,10 @@ def test_exactly_zero_info_score():
     for i in np.logspace(1, 4, 4).astype(np.int):
         labels_a, labels_b = (np.ones(i, dtype=np.int),
                               np.arange(i, dtype=np.int))
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
-        assert_equal(v_measure_score(labels_a, labels_b), 0.0)
-        assert_equal(adjusted_mutual_info_score(labels_a, labels_b), 0.0)
-        assert_equal(normalized_mutual_info_score(labels_a, labels_b), 0.0)
+        assert normalized_mutual_info_score(labels_a, labels_b) == 0.0
+        assert v_measure_score(labels_a, labels_b) == 0.0
+        assert adjusted_mutual_info_score(labels_a, labels_b) == 0.0
+        assert normalized_mutual_info_score(labels_a, labels_b) == 0.0
         for method in ["min", "geometric", "arithmetic", "max"]:
             assert adjusted_mutual_info_score(labels_a, labels_b,
                                               method) == 0.0
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
index 4b061313794e0..29dfd930b0702 100644
--- a/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -32,7 +32,7 @@ def test_silhouette():
         # Given that the actual labels are used, we can assume that S would be
         # positive.
         score_precomputed = silhouette_score(D, y, metric='precomputed')
-        assert_greater(score_precomputed, 0)
+        assert score_precomputed > 0
         # Test without calculating D
         score_euclidean = silhouette_score(X, y, metric='euclidean')
         pytest.approx(score_precomputed, score_euclidean)
@@ -50,8 +50,8 @@ def test_silhouette():
         score_euclidean = silhouette_score(X, y, metric='euclidean',
                                            sample_size=int(X.shape[0] / 2),
                                            random_state=0)
-        assert_greater(score_precomputed, 0)
-        assert_greater(score_euclidean, 0)
+        assert score_precomputed > 0
+        assert score_euclidean > 0
         pytest.approx(score_euclidean, score_precomputed)
 
         if X is X_dense:
@@ -156,8 +156,8 @@ def test_non_encoded_labels():
     dataset = datasets.load_iris()
     X = dataset.data
     labels = dataset.target
-    assert_equal(
-        silhouette_score(X, labels * 2 + 10), silhouette_score(X, labels))
+    assert (
+        silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels))
     assert_array_equal(
         silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels))
 
@@ -166,8 +166,8 @@ def test_non_numpy_labels():
     dataset = datasets.load_iris()
     X = dataset.data
     y = dataset.target
-    assert_equal(
-        silhouette_score(list(X), list(y)), silhouette_score(X, y))
+    assert (
+        silhouette_score(list(X), list(y)) == silhouette_score(X, y))
 
 
 def assert_raises_on_only_one_label(func):
@@ -192,12 +192,12 @@ def test_calinski_harabasz_score():
     assert_raises_on_all_points_same_cluster(calinski_harabasz_score)
 
     # Assert the value is 1. when all samples are equals
-    assert_equal(1., calinski_harabasz_score(np.ones((10, 2)),
-                                             [0] * 5 + [1] * 5))
+    assert 1. == calinski_harabasz_score(np.ones((10, 2)),
+                                             [0] * 5 + [1] * 5)
 
     # Assert the value is 0. when all the mean cluster are equal
-    assert_equal(0., calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
-                                             [0] * 10 + [1] * 10))
+    assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
+                                             [0] * 10 + [1] * 10)
 
     # General case (with non numpy arrays)
     X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index ed194b3c7e995..d51631a5ff6f9 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -160,13 +160,13 @@ def test_multilabel_accuracy_score_subset_accuracy():
     y1 = np.array([[0, 1, 1], [1, 0, 1]])
     y2 = np.array([[0, 0, 1], [1, 0, 1]])
 
-    assert_equal(accuracy_score(y1, y2), 0.5)
-    assert_equal(accuracy_score(y1, y1), 1)
-    assert_equal(accuracy_score(y2, y2), 1)
-    assert_equal(accuracy_score(y2, np.logical_not(y2)), 0)
-    assert_equal(accuracy_score(y1, np.logical_not(y1)), 0)
-    assert_equal(accuracy_score(y1, np.zeros(y1.shape)), 0)
-    assert_equal(accuracy_score(y2, np.zeros(y1.shape)), 0)
+    assert accuracy_score(y1, y2) == 0.5
+    assert accuracy_score(y1, y1) == 1
+    assert accuracy_score(y2, y2) == 1
+    assert accuracy_score(y2, np.logical_not(y2)) == 0
+    assert accuracy_score(y1, np.logical_not(y1)) == 0
+    assert accuracy_score(y1, np.zeros(y1.shape)) == 0
+    assert accuracy_score(y2, np.zeros(y1.shape)) == 0
 
 
 def test_precision_recall_f1_score_binary():
@@ -204,13 +204,13 @@ def test_precision_recall_f_binary_single_class():
     # Test precision, recall and F1 score behave with a single positive or
     # negative class
     # Such a case may occur with non-stratified cross-validation
-    assert_equal(1., precision_score([1, 1], [1, 1]))
-    assert_equal(1., recall_score([1, 1], [1, 1]))
-    assert_equal(1., f1_score([1, 1], [1, 1]))
+    assert 1. == precision_score([1, 1], [1, 1])
+    assert 1. == recall_score([1, 1], [1, 1])
+    assert 1. == f1_score([1, 1], [1, 1])
 
-    assert_equal(0., precision_score([-1, -1], [-1, -1]))
-    assert_equal(0., recall_score([-1, -1], [-1, -1]))
-    assert_equal(0., f1_score([-1, -1], [-1, -1]))
+    assert 0. == precision_score([-1, -1], [-1, -1])
+    assert 0. == recall_score([-1, -1], [-1, -1])
+    assert 0. == f1_score([-1, -1], [-1, -1])
 
 
 @ignore_warnings
@@ -283,7 +283,7 @@ def test_precision_recall_f_ignored_labels():
 
         # ensure the above were meaningful tests:
         for average in ['macro', 'weighted', 'micro']:
-            assert_not_equal(recall_13(average=average),
+            assert (recall_13(average=average) !=
                              recall_all(average=average))
 
 
@@ -307,7 +307,7 @@ def test_average_precision_score_duplicate_values():
     # test statistic, the average_precision_score should be 1
     y_true = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
     y_score = [0, .1, .1, .4, .5, .6, .6, .9, .9, 1, 1]
-    assert_equal(average_precision_score(y_true, y_score), 1)
+    assert average_precision_score(y_true, y_score) == 1
 
 
 def test_average_precision_score_tied_values():
@@ -320,7 +320,7 @@ def test_average_precision_score_tied_values():
     # than one.
     y_true = [0, 1, 1]
     y_score = [.5, .5, .6]
-    assert_not_equal(average_precision_score(y_true, y_score), 1.)
+    assert average_precision_score(y_true, y_score) != 1.
 
 
 @ignore_warnings
@@ -512,12 +512,12 @@ def test_cohen_kappa():
     y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
     kappa = cohen_kappa_score(y1, y2)
     assert_almost_equal(kappa, .348, decimal=3)
-    assert_equal(kappa, cohen_kappa_score(y2, y1))
+    assert kappa == cohen_kappa_score(y2, y1)
 
     # Add spurious labels and ignore them.
     y1 = np.append(y1, [2] * 4)
     y2 = np.append(y2, [2] * 4)
-    assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)
+    assert cohen_kappa_score(y1, y2, labels=[0, 1]) == kappa
 
     assert_almost_equal(cohen_kappa_score(y1, y1), 1.)
 
@@ -538,8 +538,8 @@ def test_cohen_kappa():
 
 @ignore_warnings
 def test_matthews_corrcoef_nan():
-    assert_equal(matthews_corrcoef([0], [1]), 0.0)
-    assert_equal(matthews_corrcoef([0, 0], [0, 1]), 0.0)
+    assert matthews_corrcoef([0], [1]) == 0.0
+    assert matthews_corrcoef([0, 0], [0, 1]) == 0.0
 
 
 def test_matthews_corrcoef_against_numpy_corrcoef():
@@ -792,15 +792,15 @@ def test_precision_recall_f1_score_binary_averaged():
                                                     average=None)
     p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  average='macro')
-    assert_equal(p, np.mean(ps))
-    assert_equal(r, np.mean(rs))
-    assert_equal(f, np.mean(fs))
+    assert p == np.mean(ps)
+    assert r == np.mean(rs)
+    assert f == np.mean(fs)
     p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                  average='weighted')
     support = np.bincount(y_true)
-    assert_equal(p, np.average(ps, weights=support))
-    assert_equal(r, np.average(rs, weights=support))
-    assert_equal(f, np.average(fs, weights=support))
+    assert p == np.average(ps, weights=support)
+    assert r == np.average(rs, weights=support)
+    assert f == np.average(fs, weights=support)
 
 
 def test_zero_precision_recall():
@@ -854,28 +854,28 @@ def test_confusion_matrix_dtype():
     weight = np.ones(len(y))
     # confusion_matrix returns int64 by default
     cm = confusion_matrix(y, y)
-    assert_equal(cm.dtype, np.int64)
+    assert cm.dtype == np.int64
     # The dtype of confusion_matrix is always 64 bit
     for dtype in [np.bool_, np.int32, np.uint64]:
         cm = confusion_matrix(y, y,
                               sample_weight=weight.astype(dtype, copy=False))
-        assert_equal(cm.dtype, np.int64)
+        assert cm.dtype == np.int64
     for dtype in [np.float32, np.float64, None, object]:
         cm = confusion_matrix(y, y,
                               sample_weight=weight.astype(dtype, copy=False))
-        assert_equal(cm.dtype, np.float64)
+        assert cm.dtype == np.float64
 
     # np.iinfo(np.uint32).max should be accumulated correctly
     weight = np.full(len(y), 4294967295, dtype=np.uint32)
     cm = confusion_matrix(y, y, sample_weight=weight)
-    assert_equal(cm[0, 0], 4294967295)
-    assert_equal(cm[1, 1], 8589934590)
+    assert cm[0, 0] == 4294967295
+    assert cm[1, 1] == 8589934590
 
     # np.iinfo(np.int64).max should cause an overflow
     weight = np.full(len(y), 9223372036854775807, dtype=np.int64)
     cm = confusion_matrix(y, y, sample_weight=weight)
-    assert_equal(cm[0, 0], 9223372036854775807)
-    assert_equal(cm[1, 1], -2)
+    assert cm[0, 0] == 9223372036854775807
+    assert cm[1, 1] == -2
 
 
 def test_classification_report_multiclass():
@@ -898,7 +898,7 @@ def test_classification_report_multiclass():
     report = classification_report(
         y_true, y_pred, labels=np.arange(len(iris.target_names)),
         target_names=iris.target_names)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_balanced():
@@ -916,7 +916,7 @@ def test_classification_report_multiclass_balanced():
 weighted avg       0.33      0.33      0.33         9
 """
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_with_label_detection():
@@ -936,7 +936,7 @@ def test_classification_report_multiclass_with_label_detection():
 weighted avg       0.51      0.53      0.47        75
 """
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_with_digits():
@@ -959,7 +959,7 @@ def test_classification_report_multiclass_with_digits():
     report = classification_report(
         y_true, y_pred, labels=np.arange(len(iris.target_names)),
         target_names=iris.target_names, digits=5)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_with_string_label():
@@ -980,7 +980,7 @@ def test_classification_report_multiclass_with_string_label():
 weighted avg       0.51      0.53      0.47        75
 """
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
     expected_report = """\
               precision    recall  f1-score   support
@@ -995,7 +995,7 @@ def test_classification_report_multiclass_with_string_label():
 """
     report = classification_report(y_true, y_pred,
                                    target_names=["a", "b", "c"])
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_with_unicode_label():
@@ -1017,7 +1017,7 @@ def test_classification_report_multiclass_with_unicode_label():
 weighted avg       0.51      0.53      0.47        75
 """
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_multiclass_with_long_string_label():
@@ -1040,7 +1040,7 @@ def test_classification_report_multiclass_with_long_string_label():
 """
 
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_classification_report_labels_target_names_unequal_length():
@@ -1099,7 +1099,7 @@ def test_multilabel_classification_report():
 """
 
     report = classification_report(y_true, y_pred)
-    assert_equal(report, expected_report)
+    assert report == expected_report
 
 
 def test_multilabel_zero_one_loss_subset():
@@ -1107,13 +1107,13 @@ def test_multilabel_zero_one_loss_subset():
     y1 = np.array([[0, 1, 1], [1, 0, 1]])
     y2 = np.array([[0, 0, 1], [1, 0, 1]])
 
-    assert_equal(zero_one_loss(y1, y2), 0.5)
-    assert_equal(zero_one_loss(y1, y1), 0)
-    assert_equal(zero_one_loss(y2, y2), 0)
-    assert_equal(zero_one_loss(y2, np.logical_not(y2)), 1)
-    assert_equal(zero_one_loss(y1, np.logical_not(y1)), 1)
-    assert_equal(zero_one_loss(y1, np.zeros(y1.shape)), 1)
-    assert_equal(zero_one_loss(y2, np.zeros(y1.shape)), 1)
+    assert zero_one_loss(y1, y2) == 0.5
+    assert zero_one_loss(y1, y1) == 0
+    assert zero_one_loss(y2, y2) == 0
+    assert zero_one_loss(y2, np.logical_not(y2)) == 1
+    assert zero_one_loss(y1, np.logical_not(y1)) == 1
+    assert zero_one_loss(y1, np.zeros(y1.shape)) == 1
+    assert zero_one_loss(y2, np.zeros(y1.shape)) == 1
 
 
 def test_multilabel_hamming_loss():
@@ -1122,18 +1122,18 @@ def test_multilabel_hamming_loss():
     y2 = np.array([[0, 0, 1], [1, 0, 1]])
     w = np.array([1, 3])
 
-    assert_equal(hamming_loss(y1, y2), 1 / 6)
-    assert_equal(hamming_loss(y1, y1), 0)
-    assert_equal(hamming_loss(y2, y2), 0)
-    assert_equal(hamming_loss(y2, 1 - y2), 1)
-    assert_equal(hamming_loss(y1, 1 - y1), 1)
-    assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6)
-    assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5)
-    assert_equal(hamming_loss(y1, y2, sample_weight=w), 1. / 12)
-    assert_equal(hamming_loss(y1, 1-y2, sample_weight=w), 11. / 12)
-    assert_equal(hamming_loss(y1, np.zeros_like(y1), sample_weight=w), 2. / 3)
+    assert hamming_loss(y1, y2) == 1 / 6
+    assert hamming_loss(y1, y1) == 0
+    assert hamming_loss(y2, y2) == 0
+    assert hamming_loss(y2, 1 - y2) == 1
+    assert hamming_loss(y1, 1 - y1) == 1
+    assert hamming_loss(y1, np.zeros(y1.shape)) == 4 / 6
+    assert hamming_loss(y2, np.zeros(y1.shape)) == 0.5
+    assert hamming_loss(y1, y2, sample_weight=w) == 1. / 12
+    assert hamming_loss(y1, 1-y2, sample_weight=w) == 11. / 12
+    assert hamming_loss(y1, np.zeros_like(y1), sample_weight=w) == 2. / 3
     # sp_hamming only works with 1-D arrays
-    assert_equal(hamming_loss(y1[0], y2[0]), sp_hamming(y1[0], y2[0]))
+    assert hamming_loss(y1[0], y2[0]) == sp_hamming(y1[0], y2[0])
     assert_warns_message(DeprecationWarning,
                          "The labels parameter is unused. It was"
                          " deprecated in version 0.21 and"
@@ -1335,7 +1335,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 1.5 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2.5 / 1.5 * 0.25)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"),
                         np.mean(f2))
 
@@ -1345,7 +1345,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 0.5)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1356,7 +1356,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 1.5 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2.5 / 1.5 * 0.25)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1369,7 +1369,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 0.5)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"),
                         0.5)
 
@@ -1401,7 +1401,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 0.25)
     assert_almost_equal(r, 0.25)
     assert_almost_equal(f, 2 * 0.25 * 0.25 / 0.5)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1411,7 +1411,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 0.25)
     assert_almost_equal(r, 0.125)
     assert_almost_equal(f, 2 / 12)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="macro"),
                         np.mean(f2))
@@ -1421,7 +1421,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 2 / 4)
     assert_almost_equal(r, 1 / 4)
     assert_almost_equal(f, 2 / 3 * 2 / 4)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1436,7 +1436,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 1 / 6)
     assert_almost_equal(r, 1 / 6)
     assert_almost_equal(f, 2 / 4 * 1 / 3)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="samples"),
                         0.1666, 2)
@@ -1466,7 +1466,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 1.5 / 4)
     assert_almost_equal(f, 2.5 / (4 * 1.5))
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="macro"),
                         np.mean(f2))
@@ -1476,7 +1476,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 2 / 3)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5))
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1486,7 +1486,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 3 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, (2 / 1.5 + 1) / 4)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1499,7 +1499,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 1 / 3)
     assert_almost_equal(r, 1 / 3)
     assert_almost_equal(f, 1 / 3)
-    assert_equal(s, None)
+    assert s == None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="samples"),
                         0.333, 2)
@@ -1518,7 +1518,7 @@ def test_precision_recall_f1_no_labels(beta, average):
     assert_almost_equal(p, 0)
     assert_almost_equal(r, 0)
     assert_almost_equal(f, 0)
-    assert_equal(s, None)
+    assert s == None
 
     fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                          y_true, y_pred,
@@ -1603,10 +1603,10 @@ def test_prf_warnings():
         precision_recall_fscore_support([0, 0], [0, 0], average="binary")
         msg = ('Recall and F-score are ill-defined and '
                'being set to 0.0 due to no true samples.')
-        assert_equal(str(record.pop().message), msg)
+        assert str(record.pop().message) == msg
         msg = ('Precision and F-score are ill-defined and '
                'being set to 0.0 due to no predicted samples.')
-        assert_equal(str(record.pop().message), msg)
+        assert str(record.pop().message) == msg
 
 
 def test_recall_warnings():
@@ -1619,11 +1619,11 @@ def test_recall_warnings():
         recall_score(np.array([[0, 0], [0, 0]]),
                      np.array([[1, 1], [1, 1]]),
                      average='micro')
-        assert_equal(str(record.pop().message),
+        assert (str(record.pop().message) ==
                      'Recall is ill-defined and '
                      'being set to 0.0 due to no true samples.')
         recall_score([0, 0], [0, 0])
-        assert_equal(str(record.pop().message),
+        assert (str(record.pop().message) ==
                      'Recall is ill-defined and '
                      'being set to 0.0 due to no true samples.')
 
@@ -1634,11 +1634,11 @@ def test_precision_warnings():
         precision_score(np.array([[1, 1], [1, 1]]),
                         np.array([[0, 0], [0, 0]]),
                         average='micro')
-        assert_equal(str(record.pop().message),
+        assert (str(record.pop().message) ==
                      'Precision is ill-defined and '
                      'being set to 0.0 due to no predicted samples.')
         precision_score([0, 0], [0, 0])
-        assert_equal(str(record.pop().message),
+        assert (str(record.pop().message) ==
                      'Precision is ill-defined and '
                      'being set to 0.0 due to no predicted samples.')
 
@@ -1656,20 +1656,20 @@ def test_fscore_warnings():
             score(np.array([[1, 1], [1, 1]]),
                   np.array([[0, 0], [0, 0]]),
                   average='micro')
-            assert_equal(str(record.pop().message),
+            assert (str(record.pop().message) ==
                          'F-score is ill-defined and '
                          'being set to 0.0 due to no predicted samples.')
             score(np.array([[0, 0], [0, 0]]),
                   np.array([[1, 1], [1, 1]]),
                   average='micro')
-            assert_equal(str(record.pop().message),
+            assert (str(record.pop().message) ==
                          'F-score is ill-defined and '
                          'being set to 0.0 due to no true samples.')
             score([0, 0], [0, 0])
-            assert_equal(str(record.pop().message),
+            assert (str(record.pop().message) ==
                          'F-score is ill-defined and '
                          'being set to 0.0 due to no true samples.')
-            assert_equal(str(record.pop().message),
+            assert (str(record.pop().message) ==
                          'F-score is ill-defined and '
                          'being set to 0.0 due to no predicted samples.')
 
@@ -1772,10 +1772,10 @@ def test__check_targets():
 
         else:
             merged_type, y1out, y2out = _check_targets(y1, y2)
-            assert_equal(merged_type, expected)
+            assert merged_type == expected
             if merged_type.startswith('multilabel'):
-                assert_equal(y1out.format, 'csr')
-                assert_equal(y2out.format, 'csr')
+                assert y1out.format == 'csr'
+                assert y2out.format == 'csr'
             else:
                 assert_array_equal(y1out, np.squeeze(y1))
                 assert_array_equal(y2out, np.squeeze(y2))
@@ -1795,17 +1795,17 @@ def test__check_targets_multiclass_with_both_y_true_and_y_pred_binary():
     # https://github.com/scikit-learn/scikit-learn/issues/8098
     y_true = [0, 1]
     y_pred = [0, -1]
-    assert_equal(_check_targets(y_true, y_pred)[0], 'multiclass')
+    assert _check_targets(y_true, y_pred)[0] == 'multiclass'
 
 
 def test_hinge_loss_binary():
     y_true = np.array([-1, 1, 1, -1])
     pred_decision = np.array([-8.5, 0.5, 1.5, -0.3])
-    assert_equal(hinge_loss(y_true, pred_decision), 1.2 / 4)
+    assert hinge_loss(y_true, pred_decision) == 1.2 / 4
 
     y_true = np.array([0, 2, 2, 0])
     pred_decision = np.array([-8.5, 0.5, 1.5, -0.3])
-    assert_equal(hinge_loss(y_true, pred_decision), 1.2 / 4)
+    assert hinge_loss(y_true, pred_decision) == 1.2 / 4
 
 
 def test_hinge_loss_multiclass():
@@ -1828,7 +1828,7 @@ def test_hinge_loss_multiclass():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-    assert_equal(hinge_loss(y_true, pred_decision),
+    assert (hinge_loss(y_true, pred_decision) ==
                  dummy_hinge_loss)
 
 
@@ -1866,7 +1866,7 @@ def test_hinge_loss_multiclass_with_missing_labels():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-    assert_equal(hinge_loss(y_true, pred_decision, labels=labels),
+    assert (hinge_loss(y_true, pred_decision, labels=labels) ==
                  dummy_hinge_loss)
 
 
@@ -1893,7 +1893,7 @@ def test_hinge_loss_multiclass_invariance_lists():
     ])
     np.clip(dummy_losses, 0, None, out=dummy_losses)
     dummy_hinge_loss = np.mean(dummy_losses)
-    assert_equal(hinge_loss(y_true, pred_decision),
+    assert (hinge_loss(y_true, pred_decision) ==
                  dummy_hinge_loss)
 
 
@@ -2044,10 +2044,10 @@ def test_multilabel_jaccard_similarity_score_deprecation():
     # size(y1 \union y2) = [2, 2]
 
     jss = partial(assert_warns, DeprecationWarning, jaccard_similarity_score)
-    assert_equal(jss(y1, y2), 0.75)
-    assert_equal(jss(y1, y1), 1)
-    assert_equal(jss(y2, y2), 1)
-    assert_equal(jss(y2, np.logical_not(y2)), 0)
-    assert_equal(jss(y1, np.logical_not(y1)), 0)
-    assert_equal(jss(y1, np.zeros(y1.shape)), 0)
-    assert_equal(jss(y2, np.zeros(y1.shape)), 0)
+    assert jss(y1, y2) == 0.75
+    assert jss(y1, y1) == 1
+    assert jss(y2, y2) == 1
+    assert jss(y2, np.logical_not(y2)) == 0
+    assert jss(y1, np.logical_not(y1)) == 0
+    assert jss(y1, np.zeros(y1.shape)) == 0
+    assert jss(y2, np.zeros(y1.shape)) == 0
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 86ab2f6281678..13ab6ecd3d804 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -473,13 +473,13 @@ def test_symmetry():
     y_pred_bin = random_state.randint(0, 2, size=(20, 25))
 
     # We shouldn't forget any metrics
-    assert_equal(SYMMETRIC_METRICS.union(
+    assert (SYMMETRIC_METRICS.union(
         NOT_SYMMETRIC_METRICS, set(THRESHOLDED_METRICS),
-        METRIC_UNDEFINED_BINARY_MULTICLASS),
+        METRIC_UNDEFINED_BINARY_MULTICLASS) ==
         set(ALL_METRICS))
 
-    assert_equal(
-        SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS),
+    assert (
+        SYMMETRIC_METRICS.intersection(NOT_SYMMETRIC_METRICS) ==
         set())
 
     # Symmetric metric
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 6fd39333fb7a7..ecf943a4c0bcc 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -95,15 +95,15 @@ def test_pairwise_distances():
     # scipy.spatial.
     S = pairwise_distances(X, metric="cityblock")
     S2 = pairwise_distances(X, metric=cityblock)
-    assert_equal(S.shape[0], S.shape[1])
-    assert_equal(S.shape[0], X.shape[0])
+    assert S.shape[0] == S.shape[1]
+    assert S.shape[0] == X.shape[0]
     assert_array_almost_equal(S, S2)
 
     # The manhattan metric should be equivalent to cityblock.
     S = pairwise_distances(X, Y, metric="manhattan")
     S2 = pairwise_distances(X, Y, metric=cityblock)
-    assert_equal(S.shape[0], X.shape[0])
-    assert_equal(S.shape[1], Y.shape[0])
+    assert S.shape[0] == X.shape[0]
+    assert S.shape[1] == Y.shape[0]
     assert_array_almost_equal(S, S2)
 
     # Test cosine as a string metric versus cosine callable
@@ -111,8 +111,8 @@ def test_pairwise_distances():
     # while the function cosine is scipy.spatial
     S = pairwise_distances(X, Y, metric="cosine")
     S2 = pairwise_distances(X, Y, metric=cosine)
-    assert_equal(S.shape[0], X.shape[0])
-    assert_equal(S.shape[1], Y.shape[0])
+    assert S.shape[0] == X.shape[0]
+    assert S.shape[1] == Y.shape[0]
     assert_array_almost_equal(S, S2)
 
     # Test with sparse X and Y,
@@ -217,7 +217,7 @@ def test_pairwise_precomputed(func):
 
     # Test always returns float dtype
     S = func(np.array([[1]], dtype='int'), metric='precomputed')
-    assert_equal('f', S.dtype.kind)
+    assert 'f' == S.dtype.kind
 
     # Test converts list to array-like
     S = func([[1.]], metric='precomputed')
@@ -277,7 +277,7 @@ def test_pairwise_callable_nonstrict_metric():
     # paired_distances should allow callable metric where metric(x, x) != 0
     # Knowing that the callable is a strict metric would allow the diagonal to
     # be left uncalculated and set to 0.
-    assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)
+    assert pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0] == 5
 
 
 # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
@@ -413,8 +413,8 @@ def test_pairwise_distances_argmin_min():
     assert_array_almost_equal(idxsp, expected_idx)
     assert_array_almost_equal(valssp, expected_vals)
     # We don't want np.matrix here
-    assert_equal(type(idxsp), np.ndarray)
-    assert_equal(type(valssp), np.ndarray)
+    assert type(idxsp) == np.ndarray
+    assert type(valssp) == np.ndarray
 
     # euclidean metric squared
     idx, vals = pairwise_distances_argmin_min(X, Y, metric="euclidean",
@@ -830,7 +830,7 @@ def test_chi_square_kernel():
     K_add = additive_chi2_kernel(X, Y)
     gamma = 0.1
     K = chi2_kernel(X, Y, gamma=gamma)
-    assert_equal(K.dtype, np.float)
+    assert K.dtype == np.float
     for i, x in enumerate(X):
         for j, y in enumerate(Y):
             chi2 = -np.sum((x - y) ** 2 / (x + y))
@@ -848,21 +848,21 @@ def test_chi_square_kernel():
     X = rng.random_sample((5, 4)).astype(np.float32)
     Y = rng.random_sample((10, 4)).astype(np.float32)
     K = chi2_kernel(X, Y)
-    assert_equal(K.dtype, np.float32)
+    assert K.dtype == np.float32
 
     # check integer type gets converted,
     # check that zeros are handled
     X = rng.random_sample((10, 4)).astype(np.int32)
     K = chi2_kernel(X, X)
     assert np.isfinite(K).all()
-    assert_equal(K.dtype, np.float)
+    assert K.dtype == np.float
 
     # check that kernel of similar things is greater than dissimilar ones
     X = [[.3, .7], [1., 0]]
     Y = [[0, 1], [.9, .1]]
     K = chi2_kernel(X, Y)
-    assert_greater(K[0, 0], K[0, 1])
-    assert_greater(K[1, 1], K[1, 0])
+    assert K[0, 0] > K[0, 1]
+    assert K[1, 1] > K[1, 0]
 
     # test negative input
     assert_raises(ValueError, chi2_kernel, [[0, -1]])
@@ -1034,15 +1034,15 @@ def test_check_sparse_arrays():
     # compare their difference because testing csr matrices for
     # equality with '==' does not work as expected.
     assert issparse(XA_checked)
-    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
+    assert abs(XA_sparse - XA_checked).sum() == 0
     assert issparse(XB_checked)
-    assert_equal(abs(XB_sparse - XB_checked).sum(), 0)
+    assert abs(XB_sparse - XB_checked).sum() == 0
 
     XA_checked, XA_2_checked = check_pairwise_arrays(XA_sparse, XA_sparse)
     assert issparse(XA_checked)
-    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
+    assert abs(XA_sparse - XA_checked).sum() == 0
     assert issparse(XA_2_checked)
-    assert_equal(abs(XA_2_checked - XA_checked).sum(), 0)
+    assert abs(XA_2_checked - XA_checked).sum() == 0
 
 
 def tuplify(X):
@@ -1074,24 +1074,24 @@ def test_check_preserve_type():
     XB = np.resize(np.arange(40), (5, 8)).astype(np.float32)
 
     XA_checked, XB_checked = check_pairwise_arrays(XA, None)
-    assert_equal(XA_checked.dtype, np.float32)
+    assert XA_checked.dtype == np.float32
 
     # both float32
     XA_checked, XB_checked = check_pairwise_arrays(XA, XB)
-    assert_equal(XA_checked.dtype, np.float32)
-    assert_equal(XB_checked.dtype, np.float32)
+    assert XA_checked.dtype == np.float32
+    assert XB_checked.dtype == np.float32
 
     # mismatched A
     XA_checked, XB_checked = check_pairwise_arrays(XA.astype(np.float),
                                                    XB)
-    assert_equal(XA_checked.dtype, np.float)
-    assert_equal(XB_checked.dtype, np.float)
+    assert XA_checked.dtype == np.float
+    assert XB_checked.dtype == np.float
 
     # mismatched B
     XA_checked, XB_checked = check_pairwise_arrays(XA,
                                                    XB.astype(np.float))
-    assert_equal(XA_checked.dtype, np.float)
-    assert_equal(XB_checked.dtype, np.float)
+    assert XA_checked.dtype == np.float
+    assert XB_checked.dtype == np.float
 
 
 @pytest.mark.parametrize("n_jobs", [1, 2])
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index d634bd59e0fe0..78e2d2a69a48c 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -185,8 +185,8 @@ def test_roc_curve(drop):
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, expected_auc, decimal=2)
     assert_almost_equal(roc_auc, roc_auc_score(y_true, probas_pred))
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
 
 def test_roc_curve_end_points():
@@ -196,10 +196,10 @@ def test_roc_curve_end_points():
     y_true = np.array([0] * 50 + [1] * 50)
     y_pred = rng.randint(3, size=100)
     fpr, tpr, thr = roc_curve(y_true, y_pred, drop_intermediate=True)
-    assert_equal(fpr[0], 0)
-    assert_equal(fpr[-1], 1)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thr.shape)
+    assert fpr[0] == 0
+    assert fpr[-1] == 1
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thr.shape
 
 
 def test_roc_returns_consistency():
@@ -217,8 +217,8 @@ def test_roc_returns_consistency():
 
     # compare tpr and tpr_correct to see if the thresholds' order was correct
     assert_array_almost_equal(tpr, tpr_correct, decimal=2)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
 
 def test_roc_curve_multi():
@@ -235,8 +235,8 @@ def test_roc_curve_confidence():
     fpr, tpr, thresholds = roc_curve(y_true, probas_pred - 0.5)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, 0.90, decimal=2)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
 
 def test_roc_curve_hard():
@@ -248,23 +248,23 @@ def test_roc_curve_hard():
     fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, 0.50, decimal=2)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
     # always predict zero
     trivial_pred = np.zeros(y_true.shape)
     fpr, tpr, thresholds = roc_curve(y_true, trivial_pred)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, 0.50, decimal=2)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
     # hard decisions
     fpr, tpr, thresholds = roc_curve(y_true, pred)
     roc_auc = auc(fpr, tpr)
     assert_array_almost_equal(roc_auc, 0.78, decimal=2)
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
 
 def test_roc_curve_one_label():
@@ -275,8 +275,8 @@ def test_roc_curve_one_label():
     fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
     # all true labels, all fpr should be nan
     assert_array_equal(fpr, np.full(len(thresholds), np.nan))
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
     # assert there are warnings
     fpr, tpr, thresholds = assert_warns(w, roc_curve,
@@ -284,8 +284,8 @@ def test_roc_curve_one_label():
                                         y_pred)
     # all negative labels, all tpr should be nan
     assert_array_equal(tpr, np.full(len(thresholds), np.nan))
-    assert_equal(fpr.shape, tpr.shape)
-    assert_equal(fpr.shape, thresholds.shape)
+    assert fpr.shape == tpr.shape
+    assert fpr.shape == thresholds.shape
 
 
 def test_roc_curve_toydata():
@@ -403,8 +403,8 @@ def test_roc_curve_fpr_tpr_increasing():
     y_score = [0.1, 0.7, 0.3, 0.4, 0.5]
     sample_weight = np.repeat(0.2, 5)
     fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight)
-    assert_equal((np.diff(fpr) < 0).sum(), 0)
-    assert_equal((np.diff(tpr) < 0).sum(), 0)
+    assert (np.diff(fpr) < 0).sum() == 0
+    assert (np.diff(tpr) < 0).sum() == 0
 
 
 def test_auc():
@@ -505,8 +505,8 @@ def test_precision_recall_curve():
     assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.]))
     assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
     assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
-    assert_equal(p.size, r.size)
-    assert_equal(p.size, t.size + 1)
+    assert p.size == r.size
+    assert p.size == t.size + 1
 
 
 def _test_precision_recall_curve(y_true, probas_pred):
@@ -518,13 +518,13 @@ def _test_precision_recall_curve(y_true, probas_pred):
                               average_precision_score(y_true, probas_pred))
     assert_almost_equal(_average_precision(y_true, probas_pred),
                         precision_recall_auc, decimal=3)
-    assert_equal(p.size, r.size)
-    assert_equal(p.size, thresholds.size + 1)
+    assert p.size == r.size
+    assert p.size == thresholds.size + 1
     # Smoke test in the case of proba having only one value
     p, r, thresholds = precision_recall_curve(y_true,
                                               np.zeros_like(probas_pred))
-    assert_equal(p.size, r.size)
-    assert_equal(p.size, thresholds.size + 1)
+    assert p.size == r.size
+    assert p.size == thresholds.size + 1
 
 
 def test_precision_recall_curve_errors():
@@ -648,7 +648,7 @@ def test_average_precision_constant_values():
     y_score = np.ones(100)
     # The precision is then the fraction of positive whatever the recall
     # is, as there is only one threshold:
-    assert_equal(average_precision_score(y_true, y_score), .25)
+    assert average_precision_score(y_true, y_score) == .25
 
 
 def test_average_precision_score_pos_label_errors():
@@ -681,17 +681,17 @@ def test_score_scale_invariance():
     roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
     roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
     roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
-    assert_equal(roc_auc, roc_auc_scaled_up)
-    assert_equal(roc_auc, roc_auc_scaled_down)
-    assert_equal(roc_auc, roc_auc_shifted)
+    assert roc_auc == roc_auc_scaled_up
+    assert roc_auc == roc_auc_scaled_down
+    assert roc_auc == roc_auc_shifted
 
     pr_auc = average_precision_score(y_true, probas_pred)
     pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
     pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
     pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
-    assert_equal(pr_auc, pr_auc_scaled_up)
-    assert_equal(pr_auc, pr_auc_scaled_down)
-    assert_equal(pr_auc, pr_auc_shifted)
+    assert pr_auc == pr_auc_scaled_up
+    assert pr_auc == pr_auc_scaled_down
+    assert pr_auc == pr_auc_shifted
 
 
 def check_lrap_toy(lrap_score):
@@ -759,13 +759,13 @@ def check_zero_or_all_relevant_labels(lrap_score):
 
         # No relevant labels
         y_true = np.zeros((1, n_labels))
-        assert_equal(lrap_score(y_true, y_score), 1.)
-        assert_equal(lrap_score(y_true, y_score_ties), 1.)
+        assert lrap_score(y_true, y_score) == 1.
+        assert lrap_score(y_true, y_score_ties) == 1.
 
         # Only relevant labels
         y_true = np.ones((1, n_labels))
-        assert_equal(lrap_score(y_true, y_score), 1.)
-        assert_equal(lrap_score(y_true, y_score_ties), 1.)
+        assert lrap_score(y_true, y_score) == 1.
+        assert lrap_score(y_true, y_score_ties) == 1.
 
     # Degenerate case: only one label
     assert_almost_equal(lrap_score([[1], [0], [1], [0]],
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index b9b7ade63f68e..a40ec3856f201 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -93,7 +93,7 @@ def test__check_reg_targets():
         if type1 == type2 and n_out1 == n_out2:
             y_type, y_check1, y_check2, multioutput = _check_reg_targets(
                 y1, y2, None)
-            assert_equal(type1, y_type)
+            assert type1 == y_type
             if type1 == 'continuous':
                 assert_array_equal(y_check1, np.reshape(y1, (-1, 1)))
                 assert_array_equal(y_check2, np.reshape(y2, (-1, 1)))
@@ -143,8 +143,8 @@ def test_regression_multioutput_array():
 
     r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
     assert_array_almost_equal(r, [0, -3.5], decimal=2)
-    assert_equal(np.mean(r), r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
-                 multioutput='uniform_average'))
+    assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
+                 multioutput='uniform_average')
     evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                    multioutput='raw_values')
     assert_array_almost_equal(evs, [0, -1.25], decimal=2)
@@ -155,11 +155,11 @@ def test_regression_multioutput_array():
     y_pred = [[1, 4], [-1, 1]]
     r2 = r2_score(y_true, y_pred, multioutput='raw_values')
     assert_array_almost_equal(r2, [1., -3.], decimal=2)
-    assert_equal(np.mean(r2), r2_score(y_true, y_pred,
-                 multioutput='uniform_average'))
+    assert np.mean(r2) == r2_score(y_true, y_pred,
+                 multioutput='uniform_average')
     evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
     assert_array_almost_equal(evs, [1., -3.], decimal=2)
-    assert_equal(np.mean(evs), explained_variance_score(y_true, y_pred))
+    assert np.mean(evs) == explained_variance_score(y_true, y_pred)
 
     # Handling msle separately as it does not accept negative inputs.
     y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 61ef471af3cc1..8ce7fd6389271 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -198,8 +198,8 @@ def check_multimetric_scoring_single_metric_wrapper(*args, **kwargs):
     if args[0] is not None:
         assert scorers is not None
         names, scorers = zip(*scorers.items())
-        assert_equal(len(scorers), 1)
-        assert_equal(names[0], 'score')
+        assert len(scorers) == 1
+        assert names[0] == 'score'
         scorers = scorers[0]
     return scorers
 
@@ -224,7 +224,7 @@ def test_check_scoring_and_check_multimetric_scoring():
         scorers, is_multi = _check_multimetric_scoring(estimator, scoring)
         assert is_multi
         assert isinstance(scorers, dict)
-        assert_equal(sorted(scorers.keys()), sorted(list(scoring)))
+        assert sorted(scorers.keys()) == sorted(list(scoring))
         assert all([isinstance(scorer, _PredictScorer)
                     for scorer in list(scorers.values())])
 
@@ -484,10 +484,10 @@ def test_scorer_sample_weight():
                               sample_weight=sample_weight)
             ignored = scorer(estimator[name], X_test[10:], target[10:])
             unweighted = scorer(estimator[name], X_test, target)
-            assert_not_equal(weighted, unweighted,
-                             msg="scorer {0} behaves identically when "
-                             "called with sample weights: {1} vs "
-                             "{2}".format(name, weighted, unweighted))
+            assert weighted != unweighted, (
+                "scorer {0} behaves identically when "
+                "called with sample weights: {1} vs "
+                "{2}".format(name, weighted, unweighted))
             assert_almost_equal(weighted, ignored,
                                 err_msg="scorer {0} behaves differently when "
                                 "ignoring samples and setting sample_weight to"

From bcbfe65e847ba4ed797c16105807181c92823a8e Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:57:46 +0200
Subject: [PATCH 12/22] cleanup assert_message

---
 sklearn/datasets/tests/test_samples_generator.py | 9 ++++-----
 sklearn/ensemble/tests/test_forest.py            | 8 +++-----
 sklearn/tests/test_base.py                       | 6 +++---
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index f3e0e20b7dea8..092fcc1290dea 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -103,16 +103,15 @@ def test_make_classification_informative_features():
             unique_signs, cluster_index = np.unique(signs,
                                                     return_inverse=True)
 
-            assert_message = ("Wrong number of clusters, or not in distinct "
-                              "quadrants")
-            assert len(unique_signs) == n_clusters, assert_message
+            assert len(unique_signs) == n_clusters, (
+                "Wrong number of clusters, or not in distinct quadrants")
 
             clusters_by_class = defaultdict(set)
             for cluster, cls in zip(cluster_index, y):
                 clusters_by_class[cls].add(cluster)
             for clusters in clusters_by_class.values():
-                assert_message = "Wrong number of clusters per class"
-                assert len(clusters) == n_clusters_per_class, assert_message
+                assert len(clusters) == n_clusters_per_class, (
+                    "Wrong number of clusters per class")
             assert (len(clusters_by_class) 
                     == n_classes), "Wrong number of classes"
 
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 93b3309ba5a1a..3ce35a4a2ccea 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -838,13 +838,11 @@ def check_min_weight_fraction_leaf(name):
         node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_message = ("Failed with {0} "
-                          "min_weight_fraction_leaf={1}".format(
-                              name, est.min_weight_fraction_leaf))
         assert (
             np.min(leaf_weights) >=
-            total_weight * est.min_weight_fraction_leaf), assert_message
-
+            total_weight * est.min_weight_fraction_leaf), (
+                "Failed with {0} min_weight_fraction_leaf={1}".format(
+                    name, est.min_weight_fraction_leaf))
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
 def test_min_weight_fraction_leaf(name):
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 1ed90ecca42ce..190af5a8f6800 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -287,11 +287,11 @@ def test_score_sample_weight():
         # generate random sample weights
         sample_weight = rng.randint(1, 10, size=len(ds.target))
         # check that the score with and without sample weights are different
-        assert_message = ("Unweighted and weighted scores "
-                          "are unexpectedly equal")
         assert (est.score(ds.data, ds.target) !=
                 est.score(ds.data, ds.target,
-                          sample_weight=sample_weight)), assert_message
+                          sample_weight=sample_weight)), (
+                              "Unweighted and weighted scores "
+                              "are unexpectedly equal")
 
 
 def test_clone_pandas_dataframe():

From 1862c828c6328adeba47f5eddc8faffd9fdaed9d Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:59:09 +0200
Subject: [PATCH 13/22] fix mixture, model_selection

---
 .../mixture/tests/test_bayesian_mixture.py    |   4 +-
 .../mixture/tests/test_gaussian_mixture.py    |  32 ++--
 sklearn/model_selection/tests/test_search.py  |  80 ++++-----
 sklearn/model_selection/tests/test_split.py   | 152 +++++++++---------
 .../model_selection/tests/test_validation.py  |  62 +++----
 5 files changed, 165 insertions(+), 165 deletions(-)

diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index f4503dfc7d70e..d62920af454e5 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -303,7 +303,7 @@ def test_monotonic_likelihood():
             for _ in range(600):
                 prev_lower_bound = current_lower_bound
                 current_lower_bound = bgmm.fit(X).lower_bound_
-                assert_greater_equal(current_lower_bound, prev_lower_bound)
+                assert current_lower_bound >= prev_lower_bound
 
                 if bgmm.converged_:
                     break
@@ -485,4 +485,4 @@ def test_bayesian_mixture_predict_predict_proba():
             Y_pred = bgmm.predict(X)
             Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
             assert_array_equal(Y_pred, Y_pred_proba)
-            assert_greater_equal(adjusted_rand_score(Y, Y_pred), .95)
+            assert adjusted_rand_score(Y, Y_pred) >= .95
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index b40f9a46baf5b..c1f451c7d8495 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -165,13 +165,13 @@ def test_gaussian_mixture_attributes():
                           covariance_type=covariance_type,
                           init_params=init_params).fit(X)
 
-    assert_equal(gmm.n_components, n_components)
-    assert_equal(gmm.covariance_type, covariance_type)
-    assert_equal(gmm.tol, tol)
-    assert_equal(gmm.reg_covar, reg_covar)
-    assert_equal(gmm.max_iter, max_iter)
-    assert_equal(gmm.n_init, n_init)
-    assert_equal(gmm.init_params, init_params)
+    assert gmm.n_components == n_components
+    assert gmm.covariance_type == covariance_type
+    assert gmm.tol == tol
+    assert gmm.reg_covar == reg_covar
+    assert gmm.max_iter == max_iter
+    assert gmm.n_init == n_init
+    assert gmm.init_params == init_params
 
 
 def test_check_X():
@@ -567,7 +567,7 @@ def test_gaussian_mixture_predict_predict_proba():
         Y_pred = g.predict(X)
         Y_pred_proba = g.predict_proba(X).argmax(axis=1)
         assert_array_equal(Y_pred, Y_pred_proba)
-        assert_greater(adjusted_rand_score(Y, Y_pred), .95)
+        assert adjusted_rand_score(Y, Y_pred) > .95
 
 
 @pytest.mark.filterwarnings("ignore:.*did not converge.*")
@@ -595,7 +595,7 @@ def test_gaussian_mixture_fit_predict(seed, max_iter, tol):
         Y_pred1 = f.fit(X).predict(X)
         Y_pred2 = g.fit_predict(X)
         assert_array_equal(Y_pred1, Y_pred2)
-        assert_greater(adjusted_rand_score(Y, Y_pred2), .95)
+        assert adjusted_rand_score(Y, Y_pred2) > .95
 
 
 def test_gaussian_mixture_fit_predict_n_init():
@@ -706,7 +706,7 @@ def test_multiple_init():
         train2 = GaussianMixture(n_components=n_components,
                                  covariance_type=cv_type,
                                  random_state=0, n_init=5).fit(X).score(X)
-        assert_greater_equal(train2, train1)
+        assert train2 >= train1
 
 
 def test_gaussian_mixture_n_parameters():
@@ -719,7 +719,7 @@ def test_gaussian_mixture_n_parameters():
         g = GaussianMixture(
             n_components=n_components, covariance_type=cv_type,
             random_state=rng).fit(X)
-        assert_equal(g._n_parameters(), n_params[cv_type])
+        assert g._n_parameters() == n_params[cv_type]
 
 
 def test_bic_1d_1component():
@@ -874,7 +874,7 @@ def test_score():
     gmm2 = GaussianMixture(n_components=n_components, n_init=1, reg_covar=0,
                            random_state=rng,
                            covariance_type=covar_type).fit(X)
-    assert_greater(gmm2.score(X), gmm1.score(X))
+    assert gmm2.score(X) > gmm1.score(X)
 
 
 def test_score_samples():
@@ -893,7 +893,7 @@ def test_score_samples():
                          "before using this method.", gmm.score_samples, X)
 
     gmm_score_samples = gmm.fit(X).score_samples(X)
-    assert_equal(gmm_score_samples.shape[0], rand_data.n_samples)
+    assert gmm_score_samples.shape[0] == rand_data.n_samples
 
 
 def test_monotonic_likelihood():
@@ -920,7 +920,7 @@ def test_monotonic_likelihood():
                     current_log_likelihood = gmm.fit(X).score(X)
                 except ConvergenceWarning:
                     pass
-                assert_greater_equal(current_log_likelihood,
+                assert (current_log_likelihood >=
                                      prev_log_likelihood)
 
                 if gmm.converged_:
@@ -1021,11 +1021,11 @@ def test_sample():
 
         # Check shapes of sampled data, see
         # https://github.com/scikit-learn/scikit-learn/issues/7701
-        assert_equal(X_s.shape, (n_samples, n_features))
+        assert X_s.shape == (n_samples, n_features)
 
         for sample_size in range(1, 100):
             X_s, _ = gmm.sample(sample_size)
-            assert_equal(X_s.shape, (sample_size, n_features))
+            assert X_s.shape == (sample_size, n_features)
 
 
 @ignore_warnings(category=ConvergenceWarning)
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index d05d284d7aceb..695e68aae7dfe 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -122,7 +122,7 @@ def score(self):
 
 
 def assert_grid_iter_equals_getitem(grid):
-    assert_equal(list(grid), [grid[i] for i in range(len(grid))])
+    assert list(grid) == [grid[i] for i in range(len(grid))]
 
 
 @pytest.mark.parametrize(
@@ -144,33 +144,33 @@ def test_parameter_grid():
     grid1 = ParameterGrid(params1)
     assert isinstance(grid1, Iterable)
     assert isinstance(grid1, Sized)
-    assert_equal(len(grid1), 3)
+    assert len(grid1) == 3
     assert_grid_iter_equals_getitem(grid1)
 
     params2 = {"foo": [4, 2],
                "bar": ["ham", "spam", "eggs"]}
     grid2 = ParameterGrid(params2)
-    assert_equal(len(grid2), 6)
+    assert len(grid2) == 6
 
     # loop to assert we can iterate over the grid multiple times
     for i in range(2):
         # tuple + chain transforms {"a": 1, "b": 2} to ("a", 1, "b", 2)
         points = set(tuple(chain(*(sorted(p.items())))) for p in grid2)
-        assert_equal(points,
+        assert (points ==
                      set(("bar", x, "foo", y)
                          for x, y in product(params2["bar"], params2["foo"])))
     assert_grid_iter_equals_getitem(grid2)
 
     # Special case: empty grid (useful to get default estimator settings)
     empty = ParameterGrid({})
-    assert_equal(len(empty), 1)
-    assert_equal(list(empty), [{}])
+    assert len(empty) == 1
+    assert list(empty) == [{}]
     assert_grid_iter_equals_getitem(empty)
     assert_raises(IndexError, lambda: empty[1])
 
     has_empty = ParameterGrid([{'C': [1, 10]}, {}, {'C': [.5]}])
-    assert_equal(len(has_empty), 4)
-    assert_equal(list(has_empty), [{'C': 1}, {'C': 10}, {}, {'C': .5}])
+    assert len(has_empty) == 4
+    assert list(has_empty) == [{'C': 1}, {'C': 10}, {}, {'C': .5}]
     assert_grid_iter_equals_getitem(has_empty)
 
 
@@ -183,7 +183,7 @@ def test_grid_search():
     sys.stdout = StringIO()
     grid_search.fit(X, y)
     sys.stdout = old_stdout
-    assert_equal(grid_search.best_estimator_.foo_param, 2)
+    assert grid_search.best_estimator_.foo_param == 2
 
     assert_array_equal(grid_search.cv_results_["param_foo_param"].data,
                        [1, 2, 3])
@@ -243,9 +243,9 @@ def test_grid_search_no_score():
     grid_search_no_score.fit(X, y)
 
     # check that best params are equal
-    assert_equal(grid_search_no_score.best_params_, grid_search.best_params_)
+    assert grid_search_no_score.best_params_ == grid_search.best_params_
     # check that we can call score and that it gives the correct result
-    assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y))
+    assert grid_search.score(X, y) == grid_search_no_score.score(X, y)
 
     # giving no scoring function raises an error
     grid_search_no_score = GridSearchCV(clf_no_score, {'C': Cs})
@@ -276,7 +276,7 @@ def test_grid_search_score_method():
     # ensure the test is sane
     assert score_auc < 1.0
     assert score_accuracy < 1.0
-    assert_not_equal(score_auc, score_accuracy)
+    assert score_auc != score_accuracy
 
     assert_almost_equal(score_accuracy, score_no_scoring)
     assert_almost_equal(score_auc, score_no_score_auc)
@@ -409,7 +409,7 @@ def test_grid_search_when_param_grid_includes_range():
     grid_search = None
     grid_search = GridSearchCV(clf, {'foo_param': range(1, 4)}, cv=3)
     grid_search.fit(X, y)
-    assert_equal(grid_search.best_estimator_.foo_param, 2)
+    assert grid_search.best_estimator_.foo_param == 2
 
 
 def test_grid_search_bad_param_grid():
@@ -459,7 +459,7 @@ def test_grid_search_sparse():
     C2 = cv.best_estimator_.C
 
     assert np.mean(y_pred == y_pred2) >= .9
-    assert_equal(C, C2)
+    assert C == C2
 
 
 def test_grid_search_sparse_scoring():
@@ -479,7 +479,7 @@ def test_grid_search_sparse_scoring():
     C2 = cv.best_estimator_.C
 
     assert_array_equal(y_pred, y_pred2)
-    assert_equal(C, C2)
+    assert C == C2
     # Smoke test the score
     # np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
     #                            cv.score(X_[:180], y[:180]))
@@ -493,7 +493,7 @@ def f1_loss(y_true_, y_pred_):
     y_pred3 = cv.predict(X_[180:])
     C3 = cv.best_estimator_.C
 
-    assert_equal(C, C3)
+    assert C == C3
     assert_array_equal(y_pred, y_pred3)
 
 
@@ -747,18 +747,18 @@ def test_unsupervised_grid_search():
                                    scoring=scoring, refit=refit)
         grid_search.fit(X, y)
         # Both ARI and FMS can find the right number :)
-        assert_equal(grid_search.best_params_["n_clusters"], 3)
+        assert grid_search.best_params_["n_clusters"] == 3
 
     # Single metric evaluation unsupervised
     grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]),
                                scoring='fowlkes_mallows_score')
     grid_search.fit(X, y)
-    assert_equal(grid_search.best_params_["n_clusters"], 3)
+    assert grid_search.best_params_["n_clusters"] == 3
 
     # Now without a score, and without y
     grid_search = GridSearchCV(km, param_grid=dict(n_clusters=[2, 3, 4]))
     grid_search.fit(X)
-    assert_equal(grid_search.best_params_["n_clusters"], 4)
+    assert grid_search.best_params_["n_clusters"] == 4
 
 
 def test_gridsearch_no_predict():
@@ -772,8 +772,8 @@ def custom_scoring(estimator, X):
                           param_grid=dict(bandwidth=[.01, .1, 1]),
                           scoring=custom_scoring)
     search.fit(X)
-    assert_equal(search.best_params_['bandwidth'], .1)
-    assert_equal(search.best_score_, 42)
+    assert search.best_params_['bandwidth'] == .1
+    assert search.best_score_ == 42
 
 
 def test_param_sampler():
@@ -783,7 +783,7 @@ def test_param_sampler():
     sampler = ParameterSampler(param_distributions=param_distributions,
                                n_iter=10, random_state=0)
     samples = [x for x in sampler]
-    assert_equal(len(samples), 10)
+    assert len(samples) == 10
     for sample in samples:
         assert sample["kernel"] in ["rbf", "linear"]
         assert 0 <= sample["C"] <= 1
@@ -792,13 +792,13 @@ def test_param_sampler():
     param_distributions = {"C": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}
     sampler = ParameterSampler(param_distributions=param_distributions,
                                n_iter=3, random_state=0)
-    assert_equal([x for x in sampler], [x for x in sampler])
+    assert [x for x in sampler] == [x for x in sampler]
 
     if sp_version >= (0, 16):
         param_distributions = {"C": uniform(0, 1)}
         sampler = ParameterSampler(param_distributions=param_distributions,
                                    n_iter=10, random_state=0)
-        assert_equal([x for x in sampler], [x for x in sampler])
+        assert [x for x in sampler] == [x for x in sampler]
 
 
 def check_cv_results_array_types(search, param_keys, score_keys):
@@ -852,7 +852,7 @@ def test_grid_search_cv_results():
         search = GridSearchCV(SVC(), cv=n_splits, iid=iid,
                               param_grid=params, return_train_score=True)
         search.fit(X, y)
-        assert_equal(iid, search.iid)
+        assert iid == search.iid
         cv_results = search.cv_results_
         # Check if score and timing are reasonable
         assert all(cv_results['rank_test_score'] >= 1)
@@ -905,7 +905,7 @@ def test_random_search_cv_results():
                                     param_distributions=params,
                                     return_train_score=True)
         search.fit(X, y)
-        assert_equal(iid, search.iid)
+        assert iid == search.iid
         cv_results = search.cv_results_
         # Check results structure
         check_cv_results_array_types(search, param_keys, score_keys)
@@ -1008,7 +1008,7 @@ def test_search_iid_param():
         train_std = search.cv_results_['std_train_score'][0]
 
         # Test the first candidate
-        assert_equal(search.cv_results_['param_C'][0], 1)
+        assert search.cv_results_['param_C'][0] == 1
         assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
         assert_array_almost_equal(train_cv_scores, [1, 1])
 
@@ -1054,7 +1054,7 @@ def test_search_iid_param():
         train_mean = search.cv_results_['mean_train_score'][0]
         train_std = search.cv_results_['std_train_score'][0]
 
-        assert_equal(search.cv_results_['param_C'][0], 1)
+        assert search.cv_results_['param_C'][0] == 1
         # scores are the same as above
         assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
         # Unweighted mean/std is used
@@ -1084,7 +1084,7 @@ def test_grid_search_cv_results_multimetric():
                                        iid=iid, param_grid=params,
                                        scoring=scoring, refit=False)
             grid_search.fit(X, y)
-            assert_equal(grid_search.iid, iid)
+            assert grid_search.iid == iid
             grid_searches.append(grid_search)
 
         compare_cv_results_multimetric_with_single(*grid_searches, iid=iid)
@@ -1133,7 +1133,7 @@ def compare_cv_results_multimetric_with_single(
     """Compare multi-metric cv_results with the ensemble of multiple
     single metric cv_results from single metric grid/random search"""
 
-    assert_equal(search_multi.iid, iid)
+    assert search_multi.iid == iid
     assert search_multi.multimetric_
     assert_array_equal(sorted(search_multi.scorer_),
                        ('accuracy', 'recall'))
@@ -1162,10 +1162,10 @@ def compare_cv_results_multimetric_with_single(
 def compare_refit_methods_when_refit_with_acc(search_multi, search_acc, refit):
     """Compare refit multi-metric search methods with single metric methods"""
     if refit:
-        assert_equal(search_multi.refit, 'accuracy')
+        assert search_multi.refit == 'accuracy'
     else:
         assert not search_multi.refit
-    assert_equal(search_acc.refit, refit)
+    assert search_acc.refit == refit
 
     X, y = make_blobs(n_samples=100, n_features=4, random_state=42)
     for method in ('predict', 'predict_proba', 'predict_log_proba'):
@@ -1173,7 +1173,7 @@ def compare_refit_methods_when_refit_with_acc(search_multi, search_acc, refit):
                             getattr(search_acc, method)(X))
     assert_almost_equal(search_multi.score(X, y), search_acc.score(X, y))
     for key in ('best_index_', 'best_score_', 'best_params_'):
-        assert_equal(getattr(search_multi, key), getattr(search_acc, key))
+        assert getattr(search_multi, key) == getattr(search_acc, key)
 
 
 def test_search_cv_results_rank_tie_breaking():
@@ -1245,7 +1245,7 @@ def test_search_cv_timing():
 
         assert hasattr(search, "refit_time_")
         assert isinstance(search.refit_time_, float)
-        assert_greater_equal(search.refit_time_, 0)
+        assert search.refit_time_ >= 0
 
 
 def test_grid_search_correct_score_results():
@@ -1301,8 +1301,8 @@ def test_fit_grid_point():
 
             # Test the return values of fit_grid_point
             assert_almost_equal(this_scores, expected_score)
-            assert_equal(params, this_params)
-            assert_equal(n_test_samples, test.size)
+            assert params == this_params
+            assert n_test_samples == test.size
 
     # Should raise an error upon multimetric scorer
     assert_raise_message(ValueError, "For evaluating multiple scores, use "
@@ -1492,7 +1492,7 @@ def test_parameters_sampler_replacement():
     # degenerates to GridSearchCV if n_iter the same as grid_size
     sampler = ParameterSampler(params, n_iter=6)
     samples = list(sampler)
-    assert_equal(len(samples), 6)
+    assert len(samples) == 6
     for values in ParameterGrid(params):
         assert values in samples
 
@@ -1500,16 +1500,16 @@ def test_parameters_sampler_replacement():
     params = {'a': range(10), 'b': range(10), 'c': range(10)}
     sampler = ParameterSampler(params, n_iter=99, random_state=42)
     samples = list(sampler)
-    assert_equal(len(samples), 99)
+    assert len(samples) == 99
     hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                         for p in samples]
-    assert_equal(len(set(hashable_samples)), 99)
+    assert len(set(hashable_samples)) == 99
 
     # doesn't go into infinite loops
     params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
     sampler = ParameterSampler(params_distribution, n_iter=7)
     samples = list(sampler)
-    assert_equal(len(samples), 7)
+    assert len(samples) == 7
 
 
 def test_stochastic_gradient_loss_param():
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 1f47e85ab3c43..583000e2000bc 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -169,7 +169,7 @@ def test_cross_validator_with_default_params():
             [loo_repr, lpo_repr, kf_repr, skf_repr, lolo_repr, lopo_repr,
              ss_repr, ps_repr])):
         # Test if get_n_splits works correctly
-        assert_equal(n_splits_expected[i], cv.get_n_splits(X, y, groups))
+        assert n_splits_expected[i] == cv.get_n_splits(X, y, groups)
 
         # Test if the cross-validator works as expected even if
         # the data is 1d
@@ -177,11 +177,11 @@ def test_cross_validator_with_default_params():
                                 list(cv.split(X_1d, y, groups)))
         # Test that train, test indices returned are integers
         for train, test in cv.split(X, y, groups):
-            assert_equal(np.asarray(train).dtype.kind, 'i')
-            assert_equal(np.asarray(train).dtype.kind, 'i')
+            assert np.asarray(train).dtype.kind == 'i'
+            assert np.asarray(train).dtype.kind == 'i'
 
         # Test if the repr works without any errors
-        assert_equal(cv_repr, repr(cv))
+        assert cv_repr == repr(cv)
 
     # ValueError for get_n_splits methods
     msg = "The 'X' parameter should not be None."
@@ -223,18 +223,18 @@ def check_valid_split(train, test, n_samples=None):
     train, test = set(train), set(test)
 
     # Train and test split should not overlap
-    assert_equal(train.intersection(test), set())
+    assert train.intersection(test) == set()
 
     if n_samples is not None:
         # Check that the union of train an test split cover all the indices
-        assert_equal(train.union(test), set(range(n_samples)))
+        assert train.union(test) == set(range(n_samples))
 
 
 def check_cv_coverage(cv, X, y, groups, expected_n_splits=None):
     n_samples = _num_samples(X)
     # Check that a all the samples appear at least once in a test fold
     if expected_n_splits is not None:
-        assert_equal(cv.get_n_splits(X, y, groups), expected_n_splits)
+        assert cv.get_n_splits(X, y, groups) == expected_n_splits
     else:
         expected_n_splits = cv.get_n_splits(X, y, groups)
 
@@ -246,9 +246,9 @@ def check_cv_coverage(cv, X, y, groups, expected_n_splits=None):
         collected_test_samples.update(test)
 
     # Check that the accumulated test samples cover the whole dataset
-    assert_equal(iterations, expected_n_splits)
+    assert iterations == expected_n_splits
     if n_samples is not None:
-        assert_equal(collected_test_samples, set(range(n_samples)))
+        assert collected_test_samples == set(range(n_samples))
 
 
 def test_kfold_valueerrors():
@@ -311,7 +311,7 @@ def test_kfold_indices():
     check_cv_coverage(kf, X2, y=None, groups=None, expected_n_splits=3)
 
     # Check if get_n_splits returns the number of folds
-    assert_equal(5, KFold(5).get_n_splits(X2))
+    assert 5 == KFold(5).get_n_splits(X2)
 
 
 def test_kfold_no_shuffle():
@@ -362,7 +362,7 @@ def test_stratified_kfold_no_shuffle():
     assert_array_equal(train, [0, 1, 3, 4])
 
     # Check if get_n_splits returns the number of folds
-    assert_equal(5, StratifiedKFold(5).get_n_splits(X, y))
+    assert 5 == StratifiedKFold(5).get_n_splits(X, y)
 
     # Make sure string labels are also supported
     X = np.ones(7)
@@ -399,7 +399,7 @@ def test_kfold_balance():
         sizes = [len(test) for _, test in kf]
 
         assert (np.max(sizes) - np.min(sizes)) <= 1
-        assert_equal(np.sum(sizes), i)
+        assert np.sum(sizes) == i
 
 
 def test_stratifiedkfold_balance():
@@ -416,7 +416,7 @@ def test_stratifiedkfold_balance():
             sizes = [len(test) for _, test in skf]
 
             assert (np.max(sizes) - np.min(sizes)) <= 1
-            assert_equal(np.sum(sizes), i)
+            assert np.sum(sizes) == i
 
 
 def test_shuffle_kfold():
@@ -432,13 +432,13 @@ def test_shuffle_kfold():
             kf.split(X), kf2.split(X), kf3.split(X)):
         for tr_a, tr_b in combinations((tr1, tr2, tr3), 2):
             # Assert that there is no complete overlap
-            assert_not_equal(len(np.intersect1d(tr_a, tr_b)), len(tr1))
+            assert len(np.intersect1d(tr_a, tr_b)) != len(tr1)
 
         # Set all test indices in successive iterations of kf2 to 1
         all_folds[te2] = 1
 
     # Check that all indices are returned in the different test folds
-    assert_equal(sum(all_folds), 300)
+    assert sum(all_folds) == 300
 
 
 def test_shuffle_kfold_stratifiedkfold_reproducibility():
@@ -485,7 +485,7 @@ def test_shuffle_stratifiedkfold():
     kf1 = StratifiedKFold(5, shuffle=True, random_state=1)
     for (_, test0), (_, test1) in zip(kf0.split(X_40, y),
                                       kf1.split(X_40, y)):
-        assert_not_equal(set(test0), set(test1))
+        assert set(test0) != set(test1)
     check_cv_coverage(kf0, X_40, y, groups=None, expected_n_splits=5)
 
     # Ensure that we shuffle each class's samples with different
@@ -516,8 +516,8 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
 
     cv = KFold(n_splits=n_splits, shuffle=False)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(0.92, mean_score)
-    assert_greater(mean_score, 0.80)
+    assert 0.92 > mean_score
+    assert mean_score > 0.80
 
     # Shuffling the data artificially breaks the dependency and hides the
     # overfitting of the model with regards to the writing style of the authors
@@ -525,11 +525,11 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
 
     cv = KFold(n_splits, shuffle=True, random_state=0)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(mean_score, 0.92)
+    assert mean_score > 0.92
 
     cv = KFold(n_splits, shuffle=True, random_state=1)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(mean_score, 0.92)
+    assert mean_score > 0.92
 
     # Similarly, StratifiedKFold should try to shuffle the data as little
     # as possible (while respecting the balanced class constraints)
@@ -540,8 +540,8 @@ def test_kfold_can_detect_dependent_samples_on_digits():  # see #2372
 
     cv = StratifiedKFold(n_splits)
     mean_score = cross_val_score(model, X, y, cv=cv).mean()
-    assert_greater(0.93, mean_score)
-    assert_greater(mean_score, 0.80)
+    assert 0.93 > mean_score
+    assert mean_score > 0.80
 
 
 def test_shuffle_split():
@@ -628,8 +628,8 @@ def test_stratified_shuffle_split_respects_test_size():
     sss = StratifiedShuffleSplit(6, test_size=test_size, train_size=train_size,
                                  random_state=0).split(np.ones(len(y)), y)
     for train, test in sss:
-        assert_equal(len(train), train_size)
-        assert_equal(len(test), test_size)
+        assert len(train) == train_size
+        assert len(test) == test_size
 
 
 def test_stratified_shuffle_split_iter():
@@ -661,9 +661,9 @@ def test_stratified_shuffle_split_iter():
                                   return_inverse=True)[1]) /
                       float(len(y[test])))
             assert_array_almost_equal(p_train, p_test, 1)
-            assert_equal(len(train) + len(test), y.size)
-            assert_equal(len(train), train_size)
-            assert_equal(len(test), test_size)
+            assert len(train) + len(test) == y.size
+            assert len(train) == train_size
+            assert len(test) == test_size
             assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
 
 
@@ -697,19 +697,19 @@ def assert_counts_are_ok(idx_counts, p):
             for counter, ids in [(train_counts, train), (test_counts, test)]:
                 for id in ids:
                     counter[id] += 1
-        assert_equal(n_splits_actual, n_splits)
+        assert n_splits_actual == n_splits
 
         n_train, n_test = _validate_shuffle_split(
             n_samples, test_size=1. / n_folds, train_size=1. - (1. / n_folds))
 
-        assert_equal(len(train), n_train)
-        assert_equal(len(test), n_test)
-        assert_equal(len(set(train).intersection(test)), 0)
+        assert len(train) == n_train
+        assert len(test) == n_test
+        assert len(set(train).intersection(test)) == 0
 
         group_counts = np.unique(groups)
-        assert_equal(splits.test_size, 1.0 / n_folds)
-        assert_equal(n_train + n_test, len(groups))
-        assert_equal(len(group_counts), 2)
+        assert splits.test_size == 1.0 / n_folds
+        assert n_train + n_test == len(groups)
+        assert len(group_counts) == 2
         ex_test_p = float(n_test) / n_samples
         ex_train_p = float(n_train) / n_samples
 
@@ -754,8 +754,8 @@ def test_stratified_shuffle_split_multilabel():
         # correct stratification of entire rows
         # (by design, here y[:, 0] uniquely determines the entire row of y)
         expected_ratio = np.mean(y[:, 0])
-        assert_equal(expected_ratio, np.mean(y_train[:, 0]))
-        assert_equal(expected_ratio, np.mean(y_test[:, 0]))
+        assert expected_ratio == np.mean(y_train[:, 0])
+        assert expected_ratio == np.mean(y_test[:, 0])
 
 
 def test_stratified_shuffle_split_multilabel_many_labels():
@@ -777,8 +777,8 @@ def test_stratified_shuffle_split_multilabel_many_labels():
     # correct stratification of entire rows
     # (by design, here y[:, 4] uniquely determines the entire row of y)
     expected_ratio = np.mean(y[:, 4])
-    assert_equal(expected_ratio, np.mean(y_train[:, 4]))
-    assert_equal(expected_ratio, np.mean(y_test[:, 4]))
+    assert expected_ratio == np.mean(y_train[:, 4])
+    assert expected_ratio == np.mean(y_test[:, 4])
 
 
 def test_predefinedsplit_with_kfold_split():
@@ -792,7 +792,7 @@ def test_predefinedsplit_with_kfold_split():
         folds[test_ind] = i
     ps = PredefinedSplit(folds)
     # n_splits is simply the no of unique folds
-    assert_equal(len(np.unique(folds)), ps.get_n_splits())
+    assert len(np.unique(folds)) == ps.get_n_splits()
     ps_train, ps_test = zip(*ps.split())
     assert_array_equal(ps_train, kf_train)
     assert_array_equal(ps_test, kf_test)
@@ -809,7 +809,7 @@ def test_group_shuffle_split():
         repr(slo)
 
         # Test that the length is correct
-        assert_equal(slo.get_n_splits(X, y, groups=groups_i), n_splits)
+        assert slo.get_n_splits(X, y, groups=groups_i) == n_splits
 
         l_unique = np.unique(groups_i)
         l = np.asarray(groups_i)
@@ -822,7 +822,7 @@ def test_group_shuffle_split():
             assert not np.any(np.in1d(l[test], l_train_unique))
 
             # Second test: train and test add up to all the data
-            assert_equal(l[train].size + l[test].size, l.size)
+            assert l[train].size + l[test].size == l.size
 
             # Third test: train and test are disjoint
             assert_array_equal(np.intersect1d(train, test), [])
@@ -841,10 +841,10 @@ def test_leave_one_p_group_out():
     lpgo_2 = LeavePGroupsOut(n_groups=2)
 
     # Make sure the repr works
-    assert_equal(repr(logo), 'LeaveOneGroupOut()')
-    assert_equal(repr(lpgo_1), 'LeavePGroupsOut(n_groups=1)')
-    assert_equal(repr(lpgo_2), 'LeavePGroupsOut(n_groups=2)')
-    assert_equal(repr(LeavePGroupsOut(n_groups=3)),
+    assert repr(logo) == 'LeaveOneGroupOut()'
+    assert repr(lpgo_1) == 'LeavePGroupsOut(n_groups=1)'
+    assert repr(lpgo_2) == 'LeavePGroupsOut(n_groups=2)'
+    assert (repr(LeavePGroupsOut(n_groups=3)) ==
                  'LeavePGroupsOut(n_groups=3)')
 
     for j, (cv, p_groups_out) in enumerate(((logo, 1), (lpgo_1, 1),
@@ -856,7 +856,7 @@ def test_leave_one_p_group_out():
             X = y = np.ones(len(groups_i))
 
             # Test that the length is correct
-            assert_equal(cv.get_n_splits(X, y, groups=groups_i), n_splits)
+            assert cv.get_n_splits(X, y, groups=groups_i) == n_splits
 
             groups_arr = np.asarray(groups_i)
 
@@ -868,17 +868,17 @@ def test_leave_one_p_group_out():
                                    [])
 
                 # Second test: train and test add up to all the data
-                assert_equal(len(train) + len(test), len(groups_i))
+                assert len(train) + len(test) == len(groups_i)
 
                 # Third test:
                 # The number of groups in test must be equal to p_groups_out
                 assert np.unique(groups_arr[test]).shape[0], p_groups_out
 
     # check get_n_splits() with dummy parameters
-    assert_equal(logo.get_n_splits(None, None, ['a', 'b', 'c', 'b', 'c']), 3)
-    assert_equal(logo.get_n_splits(groups=[1.0, 1.1, 1.0, 1.2]), 3)
-    assert_equal(lpgo_2.get_n_splits(None, None, np.arange(4)), 6)
-    assert_equal(lpgo_1.get_n_splits(groups=np.arange(4)), 4)
+    assert logo.get_n_splits(None, None, ['a', 'b', 'c', 'b', 'c']) == 3
+    assert logo.get_n_splits(groups=[1.0, 1.1, 1.0, 1.2]) == 3
+    assert lpgo_2.get_n_splits(None, None, np.arange(4)) == 6
+    assert lpgo_1.get_n_splits(groups=np.arange(4)) == 4
 
     # raise ValueError if a `groups` parameter is illegal
     with assert_raises(ValueError):
@@ -910,12 +910,12 @@ def test_leave_group_out_changing_groups():
             assert_array_equal(test, test_chan)
 
     # n_splits = no of 2 (p) group combinations of the unique groups = 3C2 = 3
-    assert_equal(
-        3, LeavePGroupsOut(n_groups=2).get_n_splits(X, y=X,
+    assert (
+        3 == LeavePGroupsOut(n_groups=2).get_n_splits(X, y=X,
                                                     groups=groups))
     # n_splits = no of unique groups (C(uniq_lbls, 1) = n_unique_groups)
-    assert_equal(3, LeaveOneGroupOut().get_n_splits(X, y=X,
-                                                    groups=groups))
+    assert 3 == LeaveOneGroupOut().get_n_splits(X, y=X,
+                                                    groups=groups)
 
 
 def test_leave_one_p_group_out_error_on_fewer_number_of_groups():
@@ -987,7 +987,7 @@ def test_get_n_splits_for_repeated_kfold():
     n_repeats = 4
     rkf = RepeatedKFold(n_splits, n_repeats)
     expected_n_splits = n_splits * n_repeats
-    assert_equal(expected_n_splits, rkf.get_n_splits())
+    assert expected_n_splits == rkf.get_n_splits()
 
 
 def test_get_n_splits_for_repeated_stratified_kfold():
@@ -995,7 +995,7 @@ def test_get_n_splits_for_repeated_stratified_kfold():
     n_repeats = 4
     rskf = RepeatedStratifiedKFold(n_splits, n_repeats)
     expected_n_splits = n_splits * n_repeats
-    assert_equal(expected_n_splits, rskf.get_n_splits())
+    assert expected_n_splits == rskf.get_n_splits()
 
 
 def test_repeated_stratified_kfold_determinstic_split():
@@ -1105,7 +1105,7 @@ def test_train_test_split():
     # simple test
     split = train_test_split(X, y, test_size=None, train_size=.5)
     X_train, X_test, y_train, y_test = split
-    assert_equal(len(y_test), len(y_train))
+    assert len(y_test) == len(y_train)
     # test correspondence of X and y
     assert_array_equal(X_train[:, 0], y_train * 10)
     assert_array_equal(X_test[:, 0], y_test * 10)
@@ -1120,10 +1120,10 @@ def test_train_test_split():
     X_4d = np.arange(10 * 5 * 3 * 2).reshape(10, 5, 3, 2)
     y_3d = np.arange(10 * 7 * 11).reshape(10, 7, 11)
     split = train_test_split(X_4d, y_3d)
-    assert_equal(split[0].shape, (7, 5, 3, 2))
-    assert_equal(split[1].shape, (3, 5, 3, 2))
-    assert_equal(split[2].shape, (7, 7, 11))
-    assert_equal(split[3].shape, (3, 7, 11))
+    assert split[0].shape == (7, 5, 3, 2)
+    assert split[1].shape == (3, 5, 3, 2)
+    assert split[2].shape == (7, 7, 11)
+    assert split[3].shape == (3, 7, 11)
 
     # test stratification option
     y = np.array([1, 1, 1, 1, 2, 2, 2, 2])
@@ -1132,10 +1132,10 @@ def test_train_test_split():
         train, test = train_test_split(y, test_size=test_size,
                                        stratify=y,
                                        random_state=0)
-        assert_equal(len(test), exp_test_size)
-        assert_equal(len(test) + len(train), len(y))
+        assert len(test) == exp_test_size
+        assert len(test) + len(train) == len(y)
         # check the 1:1 ratio of ones and twos in the data is preserved
-        assert_equal(np.sum(train == 1), np.sum(train == 2))
+        assert np.sum(train == 1) == np.sum(train == 2)
 
     # test unshuffled split
     y = np.arange(10)
@@ -1337,19 +1337,19 @@ def test_group_kfold():
         folds[test] = i
 
     # Check that folds have approximately the same size
-    assert_equal(len(folds), len(groups))
+    assert len(folds) == len(groups)
     for i in np.unique(folds):
-        assert_greater_equal(tolerance,
+        assert (tolerance >=
                              abs(sum(folds == i) - ideal_n_groups_per_fold))
 
     # Check that each group appears only in 1 fold
     for group in np.unique(groups):
-        assert_equal(len(np.unique(folds[groups == group])), 1)
+        assert len(np.unique(folds[groups == group])) == 1
 
     # Check that no group is on both sides of the split
     groups = np.asarray(groups, dtype=object)
     for train, test in lkf.split(X, y, groups):
-        assert_equal(len(np.intersect1d(groups[train], groups[test])), 0)
+        assert len(np.intersect1d(groups[train], groups[test])) == 0
 
     # Construct the test data
     groups = np.array(['Albert', 'Jean', 'Bertrand', 'Michel', 'Jean',
@@ -1374,21 +1374,21 @@ def test_group_kfold():
         folds[test] = i
 
     # Check that folds have approximately the same size
-    assert_equal(len(folds), len(groups))
+    assert len(folds) == len(groups)
     for i in np.unique(folds):
-        assert_greater_equal(tolerance,
+        assert (tolerance >=
                              abs(sum(folds == i) - ideal_n_groups_per_fold))
 
     # Check that each group appears only in 1 fold
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", DeprecationWarning)
         for group in np.unique(groups):
-            assert_equal(len(np.unique(folds[groups == group])), 1)
+            assert len(np.unique(folds[groups == group])) == 1
 
     # Check that no group is on both sides of the split
     groups = np.asarray(groups, dtype=object)
     for train, test in lkf.split(X, y, groups):
-        assert_equal(len(np.intersect1d(groups[train], groups[test])), 0)
+        assert len(np.intersect1d(groups[train], groups[test])) == 0
 
     # groups can also be a list
     cv_iter = list(lkf.split(X, y, groups.tolist()))
@@ -1438,8 +1438,8 @@ def test_time_series_cv():
     # Check get_n_splits returns the correct number of splits
     splits = TimeSeriesSplit(2).split(X)
     n_splits_actual = len(list(splits))
-    assert_equal(n_splits_actual, tscv.get_n_splits())
-    assert_equal(n_splits_actual, 2)
+    assert n_splits_actual == tscv.get_n_splits()
+    assert n_splits_actual == 2
 
 
 def _check_time_series_max_train_size(splits, check_splits, max_train_size):
@@ -1493,7 +1493,7 @@ def __init__(self, a, b=0, c=None):
         def __repr__(self):
             return _build_repr(self)
 
-    assert_equal(repr(MockSplitter(5, 6)), "MockSplitter(a=5, b=6, c=None)")
+    assert repr(MockSplitter(5, 6)) == "MockSplitter(a=5, b=6, c=None)"
 
 
 @pytest.mark.parametrize('CVSplitter', (ShuffleSplit, GroupShuffleSplit,
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index ca43e244fa7df..1d0f1cb1be8d0 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -415,7 +415,7 @@ def check_cross_validate_single_metric(clf, X, y, scores):
                                              scoring='neg_mean_squared_error',
                                              return_train_score=False)
         assert isinstance(mse_scores_dict, dict)
-        assert_equal(len(mse_scores_dict), dict_len)
+        assert len(mse_scores_dict) == dict_len
         assert_array_almost_equal(mse_scores_dict['test_score'],
                                   test_mse_scores)
 
@@ -430,7 +430,7 @@ def check_cross_validate_single_metric(clf, X, y, scores):
             r2_scores_dict = cross_validate(clf, X, y, scoring=['r2'],
                                             return_train_score=False)
         assert isinstance(r2_scores_dict, dict)
-        assert_equal(len(r2_scores_dict), dict_len)
+        assert len(r2_scores_dict) == dict_len
         assert_array_almost_equal(r2_scores_dict['test_r2'], test_r2_scores)
 
     # Test return_estimator option
@@ -470,9 +470,9 @@ def check_cross_validate_multi_metric(clf, X, y, scores):
                 cv_results = cross_validate(clf, X, y, scoring=scoring,
                                             return_train_score=False)
             assert isinstance(cv_results, dict)
-            assert_equal(set(cv_results.keys()),
-                         keys_with_train if return_train_score
-                         else keys_sans_train)
+            assert (set(cv_results.keys()) ==
+                         (keys_with_train if return_train_score
+                         else keys_sans_train))
             assert_array_almost_equal(cv_results['test_r2'], test_r2_scores)
             assert_array_almost_equal(
                 cv_results['test_neg_mean_squared_error'], test_mse_scores)
@@ -591,9 +591,9 @@ def assert_fit_params(clf):
         # Function to test that the values are passed correctly to the
         # classifier arguments for non-array type
 
-        assert_equal(clf.dummy_int, DUMMY_INT)
-        assert_equal(clf.dummy_str, DUMMY_STR)
-        assert_equal(clf.dummy_obj, DUMMY_OBJ)
+        assert clf.dummy_int == DUMMY_INT
+        assert clf.dummy_str == DUMMY_STR
+        assert clf.dummy_obj == DUMMY_OBJ
 
     fit_params = {'sample_weight': np.ones(n_samples),
                   'class_prior': np.full(n_classes, 1. / n_classes),
@@ -686,7 +686,7 @@ def test_permutation_score():
 
     score, scores, pvalue = permutation_test_score(
         svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
-    assert_greater(score, 0.9)
+    assert score > 0.9
     assert_almost_equal(pvalue, 0.0, 1)
 
     score_group, _, pvalue_group = permutation_test_score(
@@ -722,8 +722,8 @@ def custom_score(y_true, y_pred):
     score, scores, pvalue = permutation_test_score(
         svm, X, y, n_permutations=30, cv=cv, scoring="accuracy")
 
-    assert_less(score, 0.5)
-    assert_greater(pvalue, 0.2)
+    assert score < 0.5
+    assert pvalue > 0.2
 
 
 def test_permutation_test_score_allow_nans():
@@ -784,11 +784,11 @@ def test_cross_val_predict():
     assert_array_almost_equal(preds, preds2)
 
     preds = cross_val_predict(est, X, y)
-    assert_equal(len(preds), len(y))
+    assert len(preds) == len(y)
 
     cv = LeaveOneOut()
     preds = cross_val_predict(est, X, y, cv=cv)
-    assert_equal(len(preds), len(y))
+    assert len(preds) == len(y)
 
     Xsp = X.copy()
     Xsp *= (Xsp > np.median(Xsp))
@@ -797,7 +797,7 @@ def test_cross_val_predict():
     assert_array_almost_equal(len(preds), len(y))
 
     preds = cross_val_predict(KMeans(), X)
-    assert_equal(len(preds), len(y))
+    assert len(preds) == len(y)
 
     class BadCV():
         def split(self, X, y=None, groups=None):
@@ -822,13 +822,13 @@ def test_cross_val_predict_decision_function_shape():
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='decision_function')
-    assert_equal(preds.shape, (50,))
+    assert preds.shape == (50,)
 
     X, y = load_iris(return_X_y=True)
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='decision_function')
-    assert_equal(preds.shape, (150, 3))
+    assert preds.shape == (150, 3)
 
     # This specifically tests imbalanced splits for binary
     # classification with decision_function. This is only
@@ -852,7 +852,7 @@ def test_cross_val_predict_decision_function_shape():
     preds = cross_val_predict(est,
                               X, y,
                               method='decision_function')
-    assert_equal(preds.shape, (1797, 45))
+    assert preds.shape == (1797, 45)
 
     ind = np.argsort(y)
     X, y = X[ind], y[ind]
@@ -869,13 +869,13 @@ def test_cross_val_predict_predict_proba_shape():
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='predict_proba')
-    assert_equal(preds.shape, (50, 2))
+    assert preds.shape == (50, 2)
 
     X, y = load_iris(return_X_y=True)
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='predict_proba')
-    assert_equal(preds.shape, (150, 3))
+    assert preds.shape == (150, 3)
 
 
 def test_cross_val_predict_predict_log_proba_shape():
@@ -883,13 +883,13 @@ def test_cross_val_predict_predict_log_proba_shape():
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='predict_log_proba')
-    assert_equal(preds.shape, (50, 2))
+    assert preds.shape == (50, 2)
 
     X, y = load_iris(return_X_y=True)
 
     preds = cross_val_predict(LogisticRegression(solver="liblinear"), X, y,
                               method='predict_log_proba')
-    assert_equal(preds.shape, (150, 3))
+    assert preds.shape == (150, 3)
 
 
 def test_cross_val_predict_input_types():
@@ -902,11 +902,11 @@ def test_cross_val_predict_input_types():
     # 3 fold cv is used --> atleast 3 samples per class
     # Smoke test
     predictions = cross_val_predict(clf, X, y)
-    assert_equal(predictions.shape, (150,))
+    assert predictions.shape == (150,)
 
     # test with multioutput y
     predictions = cross_val_predict(clf, X_sparse, multioutput_y)
-    assert_equal(predictions.shape, (150, 2))
+    assert predictions.shape == (150, 2)
 
     predictions = cross_val_predict(clf, X_sparse, y)
     assert_array_equal(predictions.shape, (150,))
@@ -1000,10 +1000,10 @@ def test_learning_curve():
                                shuffle=shuffle_train, return_times=True)
         if len(w) > 0:
             raise RuntimeError("Unexpected warning: %r" % w[0].message)
-        assert_equal(train_scores.shape, (10, 3))
-        assert_equal(test_scores.shape, (10, 3))
-        assert_equal(fit_times.shape, (10, 3))
-        assert_equal(score_times.shape, (10, 3))
+        assert train_scores.shape == (10, 3)
+        assert test_scores.shape == (10, 3)
+        assert fit_times.shape == (10, 3)
+        assert score_times.shape == (10, 3)
         assert_array_equal(train_sizes, np.linspace(2, 20, 10))
         assert_array_almost_equal(train_scores.mean(axis=1),
                                   np.linspace(1.9, 1.0, 10))
@@ -1012,8 +1012,8 @@ def test_learning_curve():
 
         # Cannot use assert_array_almost_equal for fit and score times because
         # the values are hardware-dependant
-        assert_equal(fit_times.dtype, "float64")
-        assert_equal(score_times.dtype, "float64")
+        assert fit_times.dtype == "float64"
+        assert score_times.dtype == "float64"
 
         # Test a custom cv splitter that can iterate only once
         with warnings.catch_warnings(record=True) as w:
@@ -1387,7 +1387,7 @@ def check_cross_val_predict_multilabel(est, X, y, method):
     # Check actual outputs for several representations of y
     for tg in [y, y + 1, y - 2, y.astype('str')]:
         cv_predict_output = cross_val_predict(est, X, tg, method=method, cv=cv)
-        assert_equal(len(cv_predict_output), len(expected_preds))
+        assert len(cv_predict_output) == len(expected_preds)
         for i in range(len(cv_predict_output)):
             assert_allclose(cv_predict_output[i], expected_preds[i])
 
@@ -1670,7 +1670,7 @@ def test_fit_and_score_failing():
                          [FailingClassifier.FAILING_PARAMETER], cv=3,
                          error_score='unvalid-string')
 
-    assert_equal(failing_clf.score(), 0.)  # FailingClassifier coverage
+    assert failing_clf.score() == 0.  # FailingClassifier coverage
 
 
 def test_fit_and_score_working():

From a51d25f9bff69c4a79a5168de961b6e3e4c9e126 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 15:59:44 +0200
Subject: [PATCH 14/22] fix neighbors

---
 sklearn/neighbors/tests/test_kde.py           |  8 ++---
 sklearn/neighbors/tests/test_lof.py           |  8 ++---
 sklearn/neighbors/tests/test_nca.py           |  2 +-
 .../neighbors/tests/test_nearest_centroid.py  |  2 +-
 sklearn/neighbors/tests/test_neighbors.py     | 34 +++++++++----------
 5 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py
index 61dfa778194cf..3d41add23c2bc 100644
--- a/sklearn/neighbors/tests/test_kde.py
+++ b/sklearn/neighbors/tests/test_kde.py
@@ -75,7 +75,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3):
         # draw a tophat sample
         kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
         samp = kde.sample(100)
-        assert_equal(X.shape, samp.shape)
+        assert X.shape == samp.shape
 
         # check that samples are in the right range
         nbrs = NearestNeighbors(n_neighbors=1).fit(X)
@@ -96,7 +96,7 @@ def test_kernel_density_sampling(n_samples=100, n_features=3):
     # non-regression test: used to return a scalar
     X = rng.randn(4, 1)
     kde = KernelDensity(kernel="gaussian").fit(X)
-    assert_equal(kde.sample().shape, (1, 1))
+    assert kde.sample().shape == (1, 1)
 
 
 @pytest.mark.parametrize('algorithm', ['auto', 'ball_tree', 'kd_tree'])
@@ -116,7 +116,7 @@ def test_kde_algorithm_metric_choice(algorithm, metric):
         kde = KernelDensity(algorithm=algorithm, metric=metric)
         kde.fit(X)
         y_dens = kde.score_samples(Y)
-        assert_equal(y_dens.shape, Y.shape[:1])
+        assert y_dens.shape == Y.shape[:1]
 
 
 def test_kde_score(n_samples=100, n_features=3):
@@ -154,7 +154,7 @@ def test_kde_pipeline_gridsearch():
     params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
     search = GridSearchCV(pipe1, param_grid=params)
     search.fit(X)
-    assert_equal(search.best_params_['kerneldensity__bandwidth'], .1)
+    assert search.best_params_['kerneldensity__bandwidth'] == .1
 
 
 def test_kde_sample_weights():
diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
index a28118dad53ce..a00017494e328 100644
--- a/sklearn/neighbors/tests/test_lof.py
+++ b/sklearn/neighbors/tests/test_lof.py
@@ -44,7 +44,7 @@ def test_lof():
     assert_array_equal(clf._fit_X, X)
 
     # Assert largest outlier score is smaller than smallest inlier score:
-    assert_greater(np.min(score[:-2]), np.max(score[-2:]))
+    assert np.min(score[:-2]) > np.max(score[-2:])
 
     # Assert predict() works:
     clf = neighbors.LocalOutlierFactor(contamination=0.25,
@@ -71,7 +71,7 @@ def test_lof_performance():
     y_pred = -clf.decision_function(X_test)
 
     # check that roc_auc is good
-    assert_greater(roc_auc_score(y_test, y_pred), .99)
+    assert roc_auc_score(y_test, y_pred) > .99
 
 
 def test_lof_values():
@@ -123,13 +123,13 @@ def test_lof_precomputed(random_state=42):
 def test_n_neighbors_attribute():
     X = iris.data
     clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
-    assert_equal(clf.n_neighbors_, X.shape[0] - 1)
+    assert clf.n_neighbors_ == X.shape[0] - 1
 
     clf = neighbors.LocalOutlierFactor(n_neighbors=500)
     assert_warns_message(UserWarning,
                          "n_neighbors will be set to (n_samples - 1)",
                          clf.fit, X)
-    assert_equal(clf.n_neighbors_, X.shape[0] - 1)
+    assert clf.n_neighbors_ == X.shape[0] - 1
 
 
 def test_score_samples():
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index 49f94bc4d56a5..c9b78f13886ed 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -509,7 +509,7 @@ def callback(self, transformation, n_iter):
     cb = transformation_storer.callback
     nca = NeighborhoodComponentsAnalysis(max_iter=5, callback=cb)
     nca.fit(X, y)
-    assert_equal(transformation_storer.transformation.size, X.shape[1]**2)
+    assert transformation_storer.transformation.size == X.shape[1]**2
 
 
 def test_convergence_warning():
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index 25fac197c3657..3b962372159eb 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -90,7 +90,7 @@ def test_pickle():
     s = pickle.dumps(obj)
 
     obj2 = pickle.loads(s)
-    assert_equal(type(obj2), obj.__class__)
+    assert type(obj2) == obj.__class__
     score2 = obj2.score(iris.data, iris.target)
     assert_array_equal(score, score2,
                        "Failed to generate same score"
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index ff73b79493cb5..afa7159d3d61c 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -469,8 +469,8 @@ def test_radius_neighbors_boundary_handling():
         nbrs = neighbors.NearestNeighbors(radius=radius,
                                           algorithm=algorithm).fit(X)
         results = nbrs.radius_neighbors([[0.0]], return_distance=False)
-        assert_equal(results.shape, (1,))
-        assert_equal(results.dtype, object)
+        assert results.shape == (1,)
+        assert results.dtype == object
         assert_array_equal(results[0], [0, 1])
 
 
@@ -498,7 +498,7 @@ def test_RadiusNeighborsClassifier_multioutput():
             y_pred_so.append(rnn.predict(X_test))
 
         y_pred_so = np.vstack(y_pred_so).T
-        assert_equal(y_pred_so.shape, y_test.shape)
+        assert y_pred_so.shape == y_test.shape
 
         # Multioutput prediction
         rnn_mo = neighbors.RadiusNeighborsClassifier(weights=weights,
@@ -506,7 +506,7 @@ def test_RadiusNeighborsClassifier_multioutput():
         rnn_mo.fit(X_train, y_train)
         y_pred_mo = rnn_mo.predict(X_test)
 
-        assert_equal(y_pred_mo.shape, y_test.shape)
+        assert y_pred_mo.shape == y_test.shape
         assert_array_almost_equal(y_pred_mo, y_pred_so)
 
 
@@ -559,8 +559,8 @@ def test_KNeighborsClassifier_multioutput():
             y_pred_proba_so.append(knn.predict_proba(X_test))
 
         y_pred_so = np.vstack(y_pred_so).T
-        assert_equal(y_pred_so.shape, y_test.shape)
-        assert_equal(len(y_pred_proba_so), n_output)
+        assert y_pred_so.shape == y_test.shape
+        assert len(y_pred_proba_so) == n_output
 
         # Multioutput prediction
         knn_mo = neighbors.KNeighborsClassifier(weights=weights,
@@ -568,12 +568,12 @@ def test_KNeighborsClassifier_multioutput():
         knn_mo.fit(X_train, y_train)
         y_pred_mo = knn_mo.predict(X_test)
 
-        assert_equal(y_pred_mo.shape, y_test.shape)
+        assert y_pred_mo.shape == y_test.shape
         assert_array_almost_equal(y_pred_mo, y_pred_so)
 
         # Check proba
         y_pred_proba_mo = knn_mo.predict_proba(X_test)
-        assert_equal(len(y_pred_proba_mo), n_output)
+        assert len(y_pred_proba_mo) == n_output
 
         for proba_mo, proba_so in zip(y_pred_proba_mo, y_pred_proba_so):
             assert_array_almost_equal(proba_mo, proba_so)
@@ -627,8 +627,8 @@ def test_KNeighborsRegressor_multioutput_uniform_weight():
 
         y_pred = knn.predict(X_test)
 
-        assert_equal(y_pred.shape, y_test.shape)
-        assert_equal(y_pred_idx.shape, y_test.shape)
+        assert y_pred.shape == y_test.shape
+        assert y_pred_idx.shape == y_test.shape
         assert_array_almost_equal(y_pred, y_pred_idx)
 
 
@@ -654,7 +654,7 @@ def test_kneighbors_regressor_multioutput(n_samples=40,
         knn.fit(X, y)
         epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
         y_pred = knn.predict(X[:n_test_pts] + epsilon)
-        assert_equal(y_pred.shape, y_target.shape)
+        assert y_pred.shape == y_target.shape
 
         assert np.all(np.abs(y_pred - y_target) < 0.3)
 
@@ -725,8 +725,8 @@ def test_RadiusNeighborsRegressor_multioutput_with_uniform_weight():
         y_pred_idx = np.array(y_pred_idx)
         y_pred = rnn.predict(X_test)
 
-        assert_equal(y_pred_idx.shape, y_test.shape)
-        assert_equal(y_pred.shape, y_test.shape)
+        assert y_pred_idx.shape == y_test.shape
+        assert y_pred.shape == y_test.shape
         assert_array_almost_equal(y_pred, y_pred_idx)
 
 
@@ -753,7 +753,7 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40,
         epsilon = 1E-5 * (2 * rng.rand(1, n_features) - 1)
         y_pred = rnn.predict(X[:n_test_pts] + epsilon)
 
-        assert_equal(y_pred.shape, y_target.shape)
+        assert y_pred.shape == y_target.shape
         assert np.all(np.abs(y_pred - y_target) < 0.3)
 
 
@@ -805,7 +805,7 @@ def test_neighbors_iris():
 
         rgs = neighbors.KNeighborsRegressor(n_neighbors=5, algorithm=algorithm)
         rgs.fit(iris.data, iris.target)
-        assert_greater(np.mean(rgs.predict(iris.data).round() == iris.target),
+        assert (np.mean(rgs.predict(iris.data).round() == iris.target) >
                        0.95)
 
 
@@ -826,7 +826,7 @@ def test_neighbors_digits():
     score_uint8 = clf.fit(X_train, Y_train).score(X_test, Y_test)
     score_float = clf.fit(X_train.astype(float, copy=False), Y_train).score(
         X_test.astype(float, copy=False), Y_test)
-    assert_equal(score_uint8, score_float)
+    assert score_uint8 == score_float
 
 
 def test_kneighbors_graph():
@@ -1068,7 +1068,7 @@ def test_valid_brute_metric_for_auto_algorithm():
 
     # check that there is a metric that is valid for brute
     # but not ball_tree (so we actually test something)
-    assert_in("cosine", VALID_METRICS['brute'])
+    assert "cosine" in VALID_METRICS['brute']
     assert "cosine" not in VALID_METRICS['ball_tree']
 
     # Metric which don't required any additional parameter

From cfee0360b8dea58d0b658491df2790bffe31f1e6 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:00:18 +0200
Subject: [PATCH 15/22] fix neural_networks

---
 sklearn/neural_network/tests/test_mlp.py      | 56 +++++++++----------
 .../tests/test_stochastic_optimizers.py       |  2 +-
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 147eeee04b255..058dd1bde4239 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -246,8 +246,8 @@ def test_lbfgs_classification():
                                 activation=activation)
             mlp.fit(X_train, y_train)
             y_predict = mlp.predict(X_test)
-            assert_greater(mlp.score(X_train, y_train), 0.95)
-            assert_equal((y_predict.shape[0], y_predict.dtype.kind),
+            assert mlp.score(X_train, y_train) > 0.95
+            assert ((y_predict.shape[0], y_predict.dtype.kind) ==
                          expected_shape_dtype)
 
 
@@ -261,10 +261,10 @@ def test_lbfgs_regression():
                            activation=activation)
         mlp.fit(X, y)
         if activation == 'identity':
-            assert_greater(mlp.score(X, y), 0.84)
+            assert mlp.score(X, y) > 0.84
         else:
             # Non linear models perform much better than linear bottleneck:
-            assert_greater(mlp.score(X, y), 0.95)
+            assert mlp.score(X, y) > 0.95
 
 
 def test_learning_rate_warmstart():
@@ -282,9 +282,9 @@ def test_learning_rate_warmstart():
             post_eta = mlp._optimizer.learning_rate
 
         if learning_rate == 'constant':
-            assert_equal(prev_eta, post_eta)
+            assert prev_eta == post_eta
         elif learning_rate == 'invscaling':
-            assert_equal(mlp.learning_rate_init / pow(8 + 1, mlp.power_t),
+            assert (mlp.learning_rate_init / pow(8 + 1, mlp.power_t) ==
                          post_eta)
 
 
@@ -297,7 +297,7 @@ def test_multilabel_classification():
                         max_iter=150, random_state=0, activation='logistic',
                         learning_rate_init=0.2)
     mlp.fit(X, y)
-    assert_greater(mlp.score(X, y), 0.97)
+    assert mlp.score(X, y) > 0.97
 
     # test partial fit method
     mlp = MLPClassifier(solver='sgd', hidden_layer_sizes=50, max_iter=150,
@@ -305,7 +305,7 @@ def test_multilabel_classification():
                         learning_rate_init=0.2)
     for i in range(100):
         mlp.partial_fit(X, y, classes=[0, 1, 2, 3, 4])
-    assert_greater(mlp.score(X, y), 0.9)
+    assert mlp.score(X, y) > 0.9
 
     # Make sure early stopping still work now that spliting is stratified by
     # default (it is disabled for multilabel classification)
@@ -320,7 +320,7 @@ def test_multioutput_regression():
     mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
                        random_state=1)
     mlp.fit(X, y)
-    assert_greater(mlp.score(X, y), 0.9)
+    assert mlp.score(X, y) > 0.9
 
 
 def test_partial_fit_classes_error():
@@ -351,7 +351,7 @@ def test_partial_fit_classification():
             mlp.partial_fit(X, y, classes=np.unique(y))
         pred2 = mlp.predict(X)
         assert_array_equal(pred1, pred2)
-        assert_greater(mlp.score(X, y), 0.95)
+        assert mlp.score(X, y) > 0.95
 
 
 def test_partial_fit_unseen_classes():
@@ -362,7 +362,7 @@ def test_partial_fit_unseen_classes():
     clf.partial_fit([[1], [2], [3]], ["a", "b", "c"],
                     classes=["a", "b", "c", "d"])
     clf.partial_fit([[4]], ["d"])
-    assert_greater(clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]), 0)
+    assert clf.score([[1], [2], [3], [4]], ["a", "b", "c", "d"]) > 0
 
 
 def test_partial_fit_regression():
@@ -388,7 +388,7 @@ def test_partial_fit_regression():
         pred2 = mlp.predict(X)
         assert_almost_equal(pred1, pred2, decimal=2)
         score = mlp.score(X, y)
-        assert_greater(score, 0.75)
+        assert score > 0.75
 
 
 def test_partial_fit_errors():
@@ -450,11 +450,11 @@ def test_predict_proba_binary():
     proba_max = y_proba.argmax(axis=1)
     proba_log_max = y_log_proba.argmax(axis=1)
 
-    assert_equal(y_proba.shape, (n_samples, n_classes))
+    assert y_proba.shape == (n_samples, n_classes)
     assert_array_equal(proba_max, proba_log_max)
     assert_array_equal(y_log_proba, np.log(y_proba))
 
-    assert_equal(roc_auc_score(y, y_proba[:, 1]), 1.0)
+    assert roc_auc_score(y, y_proba[:, 1]) == 1.0
 
 
 def test_predict_proba_multiclass():
@@ -473,7 +473,7 @@ def test_predict_proba_multiclass():
     proba_max = y_proba.argmax(axis=1)
     proba_log_max = y_log_proba.argmax(axis=1)
 
-    assert_equal(y_proba.shape, (n_samples, n_classes))
+    assert y_proba.shape == (n_samples, n_classes)
     assert_array_equal(proba_max, proba_log_max)
     assert_array_equal(y_log_proba, np.log(y_proba))
 
@@ -490,14 +490,14 @@ def test_predict_proba_multilabel():
     clf.fit(X, Y)
     y_proba = clf.predict_proba(X)
 
-    assert_equal(y_proba.shape, (n_samples, n_classes))
+    assert y_proba.shape == (n_samples, n_classes)
     assert_array_equal(y_proba > 0.5, Y)
 
     y_log_proba = clf.predict_log_proba(X)
     proba_max = y_proba.argmax(axis=1)
     proba_log_max = y_log_proba.argmax(axis=1)
 
-    assert_greater((y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1), 1e-10)
+    assert (y_proba.sum(1) - 1).dot(y_proba.sum(1) - 1) > 1e-10
     assert_array_equal(proba_max, proba_log_max)
     assert_array_equal(y_log_proba, np.log(y_proba))
 
@@ -553,7 +553,7 @@ def test_tolerance():
     y = [1, 0]
     clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd')
     clf.fit(X, y)
-    assert_greater(clf.max_iter, clf.n_iter_)
+    assert clf.max_iter > clf.n_iter_
 
 
 def test_verbose_sgd():
@@ -580,13 +580,13 @@ def test_early_stopping():
     clf = MLPClassifier(tol=tol, max_iter=3000, solver='sgd',
                         early_stopping=True)
     clf.fit(X, y)
-    assert_greater(clf.max_iter, clf.n_iter_)
+    assert clf.max_iter > clf.n_iter_
 
     valid_scores = clf.validation_scores_
     best_valid_score = clf.best_validation_score_
-    assert_equal(max(valid_scores), best_valid_score)
-    assert_greater(best_valid_score + tol, valid_scores[-2])
-    assert_greater(best_valid_score + tol, valid_scores[-1])
+    assert max(valid_scores) == best_valid_score
+    assert best_valid_score + tol > valid_scores[-2]
+    assert best_valid_score + tol > valid_scores[-1]
 
 
 def test_adaptive_learning_rate():
@@ -595,8 +595,8 @@ def test_adaptive_learning_rate():
     clf = MLPClassifier(tol=0.5, max_iter=3000, solver='sgd',
                         learning_rate='adaptive')
     clf.fit(X, y)
-    assert_greater(clf.max_iter, clf.n_iter_)
-    assert_greater(1e-6, clf._optimizer.learning_rate)
+    assert clf.max_iter > clf.n_iter_
+    assert 1e-6 > clf._optimizer.learning_rate
 
 
 @ignore_warnings(category=RuntimeWarning)
@@ -640,8 +640,8 @@ def test_n_iter_no_change():
         clf.fit(X, y)
 
         # validate n_iter_no_change
-        assert_equal(clf._no_improvement_count, n_iter_no_change + 1)
-        assert_greater(max_iter, clf.n_iter_)
+        assert clf._no_improvement_count == n_iter_no_change + 1
+        assert max_iter > clf.n_iter_
 
 
 @ignore_warnings(category=ConvergenceWarning)
@@ -663,10 +663,10 @@ def test_n_iter_no_change_inf():
     clf.fit(X, y)
 
     # validate n_iter_no_change doesn't cause early stopping
-    assert_equal(clf.n_iter_, max_iter)
+    assert clf.n_iter_ == max_iter
 
     # validate _update_no_improvement_count() was always triggered
-    assert_equal(clf._no_improvement_count, clf.n_iter_ - 1)
+    assert clf._no_improvement_count == clf.n_iter_ - 1
 
 
 def test_early_stopping_stratified():
diff --git a/sklearn/neural_network/tests/test_stochastic_optimizers.py b/sklearn/neural_network/tests/test_stochastic_optimizers.py
index 372e0bcfd9722..d01b91cbaaf3e 100644
--- a/sklearn/neural_network/tests/test_stochastic_optimizers.py
+++ b/sklearn/neural_network/tests/test_stochastic_optimizers.py
@@ -53,7 +53,7 @@ def test_sgd_optimizer_trigger_stopping():
     lr = 2e-6
     optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive')
     assert not optimizer.trigger_stopping('', False)
-    assert_equal(lr / 5, optimizer.learning_rate)
+    assert lr / 5 == optimizer.learning_rate
     assert optimizer.trigger_stopping('', False)
 
 

From a4bf61b3fb5b43c88f2a9363c173de7d5d0f4b96 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:01:11 +0200
Subject: [PATCH 16/22] fix preprocessing

---
 sklearn/preprocessing/tests/test_data.py  | 82 +++++++++++------------
 sklearn/preprocessing/tests/test_label.py | 22 +++---
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index b49396c7c0253..ef3e4c4768c84 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -93,9 +93,9 @@ def _check_dim_1axis(a):
 def assert_correct_incr(i, batch_start, batch_stop, n, chunk_size,
                         n_samples_seen):
     if batch_stop != n:
-        assert_equal((i + 1) * chunk_size, n_samples_seen)
+        assert (i + 1) * chunk_size == n_samples_seen
     else:
-        assert_equal(i * chunk_size + (batch_stop - batch_start),
+        assert (i * chunk_size + (batch_stop - batch_start) ==
                      n_samples_seen)
 
 
@@ -128,8 +128,8 @@ def test_polynomial_features():
     X_poly = interact.fit_transform(X)
     assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
 
-    assert_equal(interact.powers_.shape, (interact.n_output_features_,
-                 interact.n_input_features_))
+    assert interact.powers_.shape == (interact.n_output_features_,
+                 interact.n_input_features_)
 
 
 def test_polynomial_feature_names():
@@ -319,7 +319,7 @@ def test_standard_scaler_1d():
                                       np.zeros_like(n_features))
             assert_array_almost_equal(X_scaled.mean(axis=0), .0)
             assert_array_almost_equal(X_scaled.std(axis=0), 1.)
-        assert_equal(scaler.n_samples_seen_, X.shape[0])
+        assert scaler.n_samples_seen_ == X.shape[0]
 
         # check inverse transform
         X_scaled_back = scaler.inverse_transform(X_scaled)
@@ -333,7 +333,7 @@ def test_standard_scaler_1d():
     assert_almost_equal(scaler.scale_, 1.)
     assert_array_almost_equal(X_scaled.mean(axis=0), .0)
     assert_array_almost_equal(X_scaled.std(axis=0), .0)
-    assert_equal(scaler.n_samples_seen_, X.shape[0])
+    assert scaler.n_samples_seen_ == X.shape[0]
 
 
 def test_standard_scaler_dtype():
@@ -408,7 +408,7 @@ def test_scaler_2d_arrays():
     scaler = StandardScaler()
     X_scaled = scaler.fit(X).transform(X, copy=True)
     assert not np.any(np.isnan(X_scaled))
-    assert_equal(scaler.n_samples_seen_, n_samples)
+    assert scaler.n_samples_seen_ == n_samples
 
     assert_array_almost_equal(X_scaled.mean(axis=0), n_features * [0.0])
     assert_array_almost_equal(X_scaled.std(axis=0), [0., 1., 1., 1., 1.])
@@ -501,7 +501,7 @@ def test_minmax_scaler_partial_fit():
                                   scaler_incr.data_min_)
         assert_array_almost_equal(scaler_batch.data_max_,
                                   scaler_incr.data_max_)
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
         assert_array_almost_equal(scaler_batch.data_range_,
                                   scaler_incr.data_range_)
         assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
@@ -516,7 +516,7 @@ def test_minmax_scaler_partial_fit():
                                   scaler_incr.data_min_)
         assert_array_almost_equal(scaler_batch.data_max_,
                                   scaler_incr.data_max_)
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
         assert_array_almost_equal(scaler_batch.data_range_,
                                   scaler_incr.data_range_)
         assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
@@ -548,8 +548,8 @@ def test_standard_scaler_partial_fit():
             scaler_incr = scaler_incr.partial_fit(X[batch])
 
         assert_array_almost_equal(scaler_batch.mean_, scaler_incr.mean_)
-        assert_equal(scaler_batch.var_, scaler_incr.var_)  # Nones
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
+        assert scaler_batch.var_ == scaler_incr.var_  # Nones
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
 
         # Test std after 1 step
         batch0 = slice(0, chunk_size)
@@ -576,7 +576,7 @@ def test_standard_scaler_partial_fit():
                                 n_samples_seen=scaler_incr.n_samples_seen_)
 
         assert_array_almost_equal(scaler_batch.var_, scaler_incr.var_)
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
 
 
 def test_standard_scaler_partial_fit_numerical_stability():
@@ -665,7 +665,7 @@ def test_standard_scaler_trasform_with_partial_fit():
         assert_array_less(zero, scaler_incr.var_ + epsilon)  # as less or equal
         assert_array_less(zero, scaler_incr.scale_ + epsilon)
         # (i+1) because the Scaler has been already fitted
-        assert_equal((i + 1), scaler_incr.n_samples_seen_)
+        assert (i + 1) == scaler_incr.n_samples_seen_
 
 
 def test_min_max_scaler_iris():
@@ -765,7 +765,7 @@ def test_min_max_scaler_1d():
         else:
             assert_array_almost_equal(X_scaled.min(axis=0), .0)
             assert_array_almost_equal(X_scaled.max(axis=0), 1.)
-        assert_equal(scaler.n_samples_seen_, X.shape[0])
+        assert scaler.n_samples_seen_ == X.shape[0]
 
         # check inverse transform
         X_scaled_back = scaler.inverse_transform(X_scaled)
@@ -775,9 +775,9 @@ def test_min_max_scaler_1d():
     X = np.ones((5, 1))
     scaler = MinMaxScaler()
     X_scaled = scaler.fit(X).transform(X)
-    assert_greater_equal(X_scaled.min(), 0.)
-    assert_less_equal(X_scaled.max(), 1.)
-    assert_equal(scaler.n_samples_seen_, X.shape[0])
+    assert X_scaled.min() >= 0.
+    assert X_scaled.max() <= 1.
+    assert scaler.n_samples_seen_ == X.shape[0]
 
     # Function interface
     X_1d = X_1row.ravel()
@@ -1386,7 +1386,7 @@ def test_quantile_transform_subsampling():
         inf_norm_arr.append(inf_norm)
     # each random subsampling yield a unique approximation to the expected
     # linspace CDF
-    assert_equal(len(np.unique(inf_norm_arr)), len(inf_norm_arr))
+    assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr)
 
     # sparse support
 
@@ -1404,7 +1404,7 @@ def test_quantile_transform_subsampling():
         inf_norm_arr.append(inf_norm)
     # each random subsampling yield a unique approximation to the expected
     # linspace CDF
-    assert_equal(len(np.unique(inf_norm_arr)), len(inf_norm_arr))
+    assert len(np.unique(inf_norm_arr)) == len(inf_norm_arr)
 
 
 def test_quantile_transform_sparse_toy():
@@ -1486,14 +1486,14 @@ def test_quantile_transform_bounds():
     X = np.random.random((1000, 1))
     transformer = QuantileTransformer()
     transformer.fit(X)
-    assert_equal(transformer.transform([[-10]]),
+    assert (transformer.transform([[-10]]) ==
                  transformer.transform([[np.min(X)]]))
-    assert_equal(transformer.transform([[10]]),
+    assert (transformer.transform([[10]]) ==
                  transformer.transform([[np.max(X)]]))
-    assert_equal(transformer.inverse_transform([[-10]]),
+    assert (transformer.inverse_transform([[-10]]) ==
                  transformer.inverse_transform(
                      [[np.min(transformer.references_)]]))
-    assert_equal(transformer.inverse_transform([[10]]),
+    assert (transformer.inverse_transform([[10]]) ==
                  transformer.inverse_transform(
                      [[np.max(transformer.references_)]]))
 
@@ -1725,7 +1725,7 @@ def test_maxabs_scaler_1d():
                                       np.ones(n_features))
         else:
             assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.)
-        assert_equal(scaler.n_samples_seen_, X.shape[0])
+        assert scaler.n_samples_seen_ == X.shape[0]
 
         # check inverse transform
         X_scaled_back = scaler.inverse_transform(X_scaled)
@@ -1736,7 +1736,7 @@ def test_maxabs_scaler_1d():
     scaler = MaxAbsScaler()
     X_scaled = scaler.fit(X).transform(X)
     assert_array_almost_equal(np.abs(X_scaled.max(axis=0)), 1.)
-    assert_equal(scaler.n_samples_seen_, X.shape[0])
+    assert scaler.n_samples_seen_ == X.shape[0]
 
     # function interface
     X_1d = X_1row.ravel()
@@ -1769,10 +1769,10 @@ def test_maxabs_scaler_partial_fit():
                                   scaler_incr_csr.max_abs_)
         assert_array_almost_equal(scaler_batch.max_abs_,
                                   scaler_incr_csc.max_abs_)
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
-        assert_equal(scaler_batch.n_samples_seen_,
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
+        assert (scaler_batch.n_samples_seen_ ==
                      scaler_incr_csr.n_samples_seen_)
-        assert_equal(scaler_batch.n_samples_seen_,
+        assert (scaler_batch.n_samples_seen_ ==
                      scaler_incr_csc.n_samples_seen_)
         assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
         assert_array_almost_equal(scaler_batch.scale_, scaler_incr_csr.scale_)
@@ -1786,7 +1786,7 @@ def test_maxabs_scaler_partial_fit():
         scaler_incr = MaxAbsScaler().partial_fit(X[batch0])
 
         assert_array_almost_equal(scaler_batch.max_abs_, scaler_incr.max_abs_)
-        assert_equal(scaler_batch.n_samples_seen_, scaler_incr.n_samples_seen_)
+        assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_
         assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_)
         assert_array_almost_equal(scaler_batch.transform(X),
                                   scaler_incr.transform(X))
@@ -1966,7 +1966,7 @@ def test_normalize():
             for norm in ('l1', 'l2'):
                 X = X.astype(dtype)
                 X_norm = normalize(X, norm=norm)
-                assert_equal(X_norm.dtype, dtype)
+                assert X_norm.dtype == dtype
 
                 X_norm = toarray(X_norm)
                 if norm == 'l1':
@@ -2005,23 +2005,23 @@ def test_binarizer():
 
         binarizer = Binarizer(threshold=2.0, copy=True)
         X_bin = toarray(binarizer.transform(X))
-        assert_equal(np.sum(X_bin == 0), 4)
-        assert_equal(np.sum(X_bin == 1), 2)
+        assert np.sum(X_bin == 0) == 4
+        assert np.sum(X_bin == 1) == 2
         X_bin = binarizer.transform(X)
-        assert_equal(sparse.issparse(X), sparse.issparse(X_bin))
+        assert sparse.issparse(X) == sparse.issparse(X_bin)
 
         binarizer = Binarizer(copy=True).fit(X)
         X_bin = toarray(binarizer.transform(X))
         assert X_bin is not X
-        assert_equal(np.sum(X_bin == 0), 2)
-        assert_equal(np.sum(X_bin == 1), 4)
+        assert np.sum(X_bin == 0) == 2
+        assert np.sum(X_bin == 1) == 4
 
         binarizer = Binarizer(copy=True)
         X_bin = binarizer.transform(X)
         assert X_bin is not X
         X_bin = toarray(X_bin)
-        assert_equal(np.sum(X_bin == 0), 2)
-        assert_equal(np.sum(X_bin == 1), 4)
+        assert np.sum(X_bin == 0) == 2
+        assert np.sum(X_bin == 1) == 4
 
         binarizer = Binarizer(copy=False)
         X_bin = binarizer.transform(X)
@@ -2035,16 +2035,16 @@ def test_binarizer():
             assert X_bin is X_float
 
         X_bin = toarray(X_bin)
-        assert_equal(np.sum(X_bin == 0), 2)
-        assert_equal(np.sum(X_bin == 1), 4)
+        assert np.sum(X_bin == 0) == 2
+        assert np.sum(X_bin == 1) == 4
 
     binarizer = Binarizer(threshold=-0.5, copy=True)
     for init in (np.array, list):
         X = init(X_.copy())
 
         X_bin = toarray(binarizer.transform(X))
-        assert_equal(np.sum(X_bin == 0), 1)
-        assert_equal(np.sum(X_bin == 1), 5)
+        assert np.sum(X_bin == 0) == 1
+        assert np.sum(X_bin == 1) == 5
         X_bin = binarizer.transform(X)
 
     # Cannot use threshold < 0 for sparse
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index bd853f0bba59d..ebb7d6b0138f8 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -268,26 +268,26 @@ def test_sparse_output_multilabel_binarizer():
             # With fit_transform
             mlb = MultiLabelBinarizer(sparse_output=sparse_output)
             got = mlb.fit_transform(inp())
-            assert_equal(issparse(got), sparse_output)
+            assert issparse(got) == sparse_output
             if sparse_output:
                 # verify CSR assumption that indices and indptr have same dtype
-                assert_equal(got.indices.dtype, got.indptr.dtype)
+                assert got.indices.dtype == got.indptr.dtype
                 got = got.toarray()
             assert_array_equal(indicator_mat, got)
             assert_array_equal([1, 2, 3], mlb.classes_)
-            assert_equal(mlb.inverse_transform(got), inverse)
+            assert mlb.inverse_transform(got) == inverse
 
             # With fit
             mlb = MultiLabelBinarizer(sparse_output=sparse_output)
             got = mlb.fit(inp()).transform(inp())
-            assert_equal(issparse(got), sparse_output)
+            assert issparse(got) == sparse_output
             if sparse_output:
                 # verify CSR assumption that indices and indptr have same dtype
-                assert_equal(got.indices.dtype, got.indptr.dtype)
+                assert got.indices.dtype == got.indptr.dtype
                 got = got.toarray()
             assert_array_equal(indicator_mat, got)
             assert_array_equal([1, 2, 3], mlb.classes_)
-            assert_equal(mlb.inverse_transform(got), inverse)
+            assert mlb.inverse_transform(got) == inverse
 
     assert_raises(ValueError, mlb.inverse_transform,
                   csr_matrix(np.array([[0, 1, 1],
@@ -312,14 +312,14 @@ def test_multilabel_binarizer():
         got = mlb.fit_transform(inp())
         assert_array_equal(indicator_mat, got)
         assert_array_equal([1, 2, 3], mlb.classes_)
-        assert_equal(mlb.inverse_transform(got), inverse)
+        assert mlb.inverse_transform(got) == inverse
 
         # With fit
         mlb = MultiLabelBinarizer()
         got = mlb.fit(inp()).transform(inp())
         assert_array_equal(indicator_mat, got)
         assert_array_equal([1, 2, 3], mlb.classes_)
-        assert_equal(mlb.inverse_transform(got), inverse)
+        assert mlb.inverse_transform(got) == inverse
 
 
 def test_multilabel_binarizer_empty_sample():
@@ -497,7 +497,7 @@ def check_binarized_results(y, classes, pos_label, neg_label, expected):
                                    pos_label=pos_label,
                                    sparse_output=sparse_output)
         assert_array_equal(toarray(binarized), expected)
-        assert_equal(issparse(binarized), sparse_output)
+        assert issparse(binarized) == sparse_output
 
         # check inverse
         y_type = type_of_target(y)
@@ -519,10 +519,10 @@ def check_binarized_results(y, classes, pos_label, neg_label, expected):
                             sparse_output=sparse_output)
         binarized = lb.fit_transform(y)
         assert_array_equal(toarray(binarized), expected)
-        assert_equal(issparse(binarized), sparse_output)
+        assert issparse(binarized) == sparse_output
         inverse_output = lb.inverse_transform(binarized)
         assert_array_equal(toarray(inverse_output), toarray(y))
-        assert_equal(issparse(inverse_output), issparse(y))
+        assert issparse(inverse_output) == issparse(y)
 
 
 def test_label_binarize_binary():

From 26216d08faacd4cb160f22bba38c400965762888 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:02:08 +0200
Subject: [PATCH 17/22] fix semi_supervised, svm

---
 .../tests/test_label_propagation.py           |  6 +--
 sklearn/svm/tests/test_sparse.py              |  2 +-
 sklearn/svm/tests/test_svm.py                 | 42 +++++++++----------
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index ef594fccb7076..d5a4449ee9cc5 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -32,7 +32,7 @@ def test_fit_transduction():
     labels = [0, 1, -1]
     for estimator, parameters in ESTIMATORS:
         clf = estimator(**parameters).fit(samples, labels)
-        assert_equal(clf.transduction_[2], 1)
+        assert clf.transduction_[2] == 1
 
 
 def test_distribution():
@@ -144,11 +144,11 @@ def test_convergence_warning():
     y = np.array([0, 1, -1])
     mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=1)
     assert_warns(ConvergenceWarning, mdl.fit, X, y)
-    assert_equal(mdl.n_iter_, mdl.max_iter)
+    assert mdl.n_iter_ == mdl.max_iter
 
     mdl = label_propagation.LabelPropagation(kernel='rbf', max_iter=1)
     assert_warns(ConvergenceWarning, mdl.fit, X, y)
-    assert_equal(mdl.n_iter_, mdl.max_iter)
+    assert mdl.n_iter_ == mdl.max_iter
 
     mdl = label_propagation.LabelSpreading(kernel='rbf', max_iter=500)
     assert_no_warnings(mdl.fit, X, y)
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 1cf533f856113..3f7e254ba6379 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -229,7 +229,7 @@ def test_linearsvc_iris():
     sp_clf = svm.LinearSVC(random_state=0).fit(iris.data, iris.target)
     clf = svm.LinearSVC(random_state=0).fit(iris.data.toarray(), iris.target)
 
-    assert_equal(clf.fit_intercept, sp_clf.fit_intercept)
+    assert clf.fit_intercept == sp_clf.fit_intercept
 
     assert_array_almost_equal(clf.coef_, sp_clf.coef_, decimal=1)
     assert_array_almost_equal(clf.intercept_, sp_clf.intercept_, decimal=1)
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index ad2328a84a61c..d7f7a1534f728 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -57,7 +57,7 @@ def test_libsvm_iris():
     # shuffle the dataset so that labels are not ordered
     for k in ('linear', 'rbf'):
         clf = svm.SVC(kernel=k).fit(iris.data, iris.target)
-        assert_greater(np.mean(clf.predict(iris.data) == iris.target), 0.9)
+        assert np.mean(clf.predict(iris.data) == iris.target) > 0.9
         assert hasattr(clf, "coef_") == (k == 'linear')
 
     assert_array_equal(clf.classes_, np.sort(clf.classes_))
@@ -65,18 +65,18 @@ def test_libsvm_iris():
     # check also the low-level API
     model = svm.libsvm.fit(iris.data, iris.target.astype(np.float64))
     pred = svm.libsvm.predict(iris.data, *model)
-    assert_greater(np.mean(pred == iris.target), .95)
+    assert np.mean(pred == iris.target) > .95
 
     model = svm.libsvm.fit(iris.data, iris.target.astype(np.float64),
                            kernel='linear')
     pred = svm.libsvm.predict(iris.data, *model, kernel='linear')
-    assert_greater(np.mean(pred == iris.target), .95)
+    assert np.mean(pred == iris.target) > .95
 
     pred = svm.libsvm.cross_validation(iris.data,
                                        iris.target.astype(np.float64), 5,
                                        kernel='linear',
                                        random_seed=0)
-    assert_greater(np.mean(pred == iris.target), .95)
+    assert np.mean(pred == iris.target) > .95
 
     # If random_seed >= 0, the libsvm rng is seeded (by calling `srand`), hence
     # we should get deterministic results (assuming that there is no other
@@ -168,7 +168,7 @@ def test_svr():
                 svm.LinearSVR(C=10.),
                 svm.LinearSVR(C=10.)):
         clf.fit(diabetes.data, diabetes.target)
-        assert_greater(clf.score(diabetes.data, diabetes.target), 0.02)
+        assert clf.score(diabetes.data, diabetes.target) > 0.02
 
     # non-regression test; previously, BaseLibSVM would check that
     # len(np.unique(y)) < 2, which must only be done for SVC
@@ -243,7 +243,7 @@ def test_oneclass():
     pred = clf.predict(T)
 
     assert_array_equal(pred, [1, -1, -1])
-    assert_equal(pred.dtype, np.dtype('intp'))
+    assert pred.dtype == np.dtype('intp')
     assert_array_almost_equal(clf.intercept_, [-1.218], decimal=3)
     assert_array_almost_equal(clf.dual_coef_,
                               [[0.750, 0.750, 0.750, 0.750]],
@@ -272,9 +272,9 @@ def test_oneclass_decision_function():
 
     # predict things
     y_pred_test = clf.predict(X_test)
-    assert_greater(np.mean(y_pred_test == 1), .9)
+    assert np.mean(y_pred_test == 1) > .9
     y_pred_outliers = clf.predict(X_outliers)
-    assert_greater(np.mean(y_pred_outliers == -1), .9)
+    assert np.mean(y_pred_outliers == -1) > .9
     dec_func_test = clf.decision_function(X_test)
     assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
     dec_func_outliers = clf.decision_function(X_outliers)
@@ -360,7 +360,7 @@ def test_decision_function_shape():
     clf = svm.SVC(kernel='linear', C=0.1,
                   decision_function_shape='ovr').fit(iris.data, iris.target)
     dec = clf.decision_function(iris.data)
-    assert_equal(dec.shape, (len(iris.data), 3))
+    assert dec.shape == (len(iris.data), 3)
     assert_array_equal(clf.predict(iris.data), np.argmax(dec, axis=1))
 
     # with five classes:
@@ -370,14 +370,14 @@ def test_decision_function_shape():
     clf = svm.SVC(kernel='linear', C=0.1,
                   decision_function_shape='ovr').fit(X_train, y_train)
     dec = clf.decision_function(X_test)
-    assert_equal(dec.shape, (len(X_test), 5))
+    assert dec.shape == (len(X_test), 5)
     assert_array_equal(clf.predict(X_test), np.argmax(dec, axis=1))
 
     # check shape of ovo_decition_function=True
     clf = svm.SVC(kernel='linear', C=0.1,
                   decision_function_shape='ovo').fit(X_train, y_train)
     dec = clf.decision_function(X_train)
-    assert_equal(dec.shape, (len(X_train), 10))
+    assert dec.shape == (len(X_train), 10)
 
 
 def test_svr_predict():
@@ -548,7 +548,7 @@ def test_sparse_precomputed():
         clf.fit(sparse_gram, [0, 1])
         assert not "reached"
     except TypeError as e:
-        assert_in("Sparse precomputed", str(e))
+        assert "Sparse precomputed" in str(e)
 
 
 def test_linearsvc_parameters():
@@ -715,7 +715,7 @@ def test_crammer_singer_binary():
         acc = svm.LinearSVC(fit_intercept=fit_intercept,
                             multi_class="crammer_singer",
                             random_state=0).fit(X, y).score(X, y)
-        assert_greater(acc, 0.9)
+        assert acc > 0.9
 
 
 def test_linearsvc_iris():
@@ -723,8 +723,8 @@ def test_linearsvc_iris():
     # Also, test symbolic class names (classes_).
     target = iris.target_names[iris.target]
     clf = svm.LinearSVC(random_state=0).fit(iris.data, target)
-    assert_equal(set(clf.classes_), set(iris.target_names))
-    assert_greater(np.mean(clf.predict(iris.data) == target), 0.8)
+    assert set(clf.classes_) == set(iris.target_names)
+    assert np.mean(clf.predict(iris.data) == target) > 0.8
 
     dec = clf.decision_function(iris.data)
     pred = iris.target_names[np.argmax(dec, 1)]
@@ -754,7 +754,7 @@ def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC):
     clf.intercept_scaling = 100
     clf.fit(X, y)
     intercept1 = clf.intercept_
-    assert_less(intercept1, -1)
+    assert intercept1 < -1
 
     # when intercept_scaling is sufficiently high, the intercept value
     # doesn't depend on intercept_scaling value
@@ -883,11 +883,11 @@ def test_linear_svm_convergence_warnings():
 
     lsvc = svm.LinearSVC(random_state=0, max_iter=2)
     assert_warns(ConvergenceWarning, lsvc.fit, X, Y)
-    assert_equal(lsvc.n_iter_, 2)
+    assert lsvc.n_iter_ == 2
 
     lsvr = svm.LinearSVR(random_state=0, max_iter=2)
     assert_warns(ConvergenceWarning, lsvr.fit, iris.data, iris.target)
-    assert_equal(lsvr.n_iter_, 2)
+    assert lsvr.n_iter_ == 2
 
 
 def test_svr_coef_sign():
@@ -919,7 +919,7 @@ def test_lsvc_intercept_scaling_zero():
 
     lsvc = svm.LinearSVC(fit_intercept=False)
     lsvc.fit(X, Y)
-    assert_equal(lsvc.intercept_, 0.)
+    assert lsvc.intercept_ == 0.
 
 
 def test_hasattr_predict_proba():
@@ -950,7 +950,7 @@ def test_decision_function_shape_two_class():
         for estimator in [svm.SVC, svm.NuSVC]:
             clf = OneVsRestClassifier(
                 estimator(decision_function_shape="ovr")).fit(X, y)
-            assert_equal(len(clf.predict(X)), len(y))
+            assert len(clf.predict(X)) == len(y)
 
 
 def test_ovr_decision_function():
@@ -988,7 +988,7 @@ def test_ovr_decision_function():
     pred_class_deci_val = deci_val[range(8), y_pred].reshape((4, 2))
 
     # Assert pred_class_deci_val > 0 here
-    assert_greater(np.min(pred_class_deci_val), 0.0)
+    assert np.min(pred_class_deci_val) > 0.0
 
     # Test if the first point has lower decision value on every quadrant
     # compared to the second point

From b46c28e827bd848411ae8445e2b1b4f7ea71a632 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:17:43 +0200
Subject: [PATCH 18/22] fix semi_supervised, tree

---
 sklearn/tree/tests/test_export.py |  26 ++--
 sklearn/tree/tests/test_tree.py   | 248 ++++++++++++++----------------
 2 files changed, 130 insertions(+), 144 deletions(-)

diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 317a11ae25836..f41ef59f7b7a2 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -46,7 +46,7 @@ def test_graphviz_toy():
                 'headlabel="False"] ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test with feature_names
     contents1 = export_graphviz(clf, feature_names=["feature0", "feature1"],
@@ -63,7 +63,7 @@ def test_graphviz_toy():
                 'headlabel="False"] ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test with class_names
     contents1 = export_graphviz(clf, class_names=["yes", "no"], out_file=None)
@@ -81,7 +81,7 @@ def test_graphviz_toy():
                 'headlabel="False"] ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test plot_options
     contents1 = export_graphviz(clf, filled=True, impurity=False,
@@ -103,7 +103,7 @@ def test_graphviz_toy():
                 'headlabel="False"] ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test max_depth
     contents1 = export_graphviz(clf, max_depth=0,
@@ -118,7 +118,7 @@ def test_graphviz_toy():
                 '0 -> 2 ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test max_depth with plot_options
     contents1 = export_graphviz(clf, max_depth=0, filled=True,
@@ -133,7 +133,7 @@ def test_graphviz_toy():
                 '0 -> 2 ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test multi-output with weighted samples
     clf = DecisionTreeClassifier(max_depth=2,
@@ -166,7 +166,7 @@ def test_graphviz_toy():
                 '2 -> 4 ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test regression output with plot_options
     clf = DecisionTreeRegressor(max_depth=3,
@@ -197,7 +197,7 @@ def test_graphviz_toy():
                 '{rank=same ; 1; 2} ;\n' \
                 '}'
 
-    assert_equal(contents1, contents2)
+    assert contents1 == contents2
 
     # Test classifier with degraded learning set
     clf = DecisionTreeClassifier(max_depth=3)
@@ -262,7 +262,7 @@ def test_friedman_mse_in_graphviz():
         export_graphviz(estimator[0], out_file=dot_data)
 
     for finding in finditer(r"\[.*?samples.*?\]", dot_data.getvalue()):
-        assert_in("friedman_mse", finding.group())
+        assert "friedman_mse" in finding.group()
 
 
 def test_precision():
@@ -291,8 +291,8 @@ def test_precision():
 
             # check value
             for finding in finditer(r"value = \d+\.\d+", dot_data):
-                assert_less_equal(
-                    len(search(r"\.\d+", finding.group()).group()),
+                assert (
+                    len(search(r"\.\d+", finding.group()).group()) <=
                     precision + 1)
             # check impurity
             if is_classifier(clf):
@@ -302,11 +302,11 @@ def test_precision():
 
             # check impurity
             for finding in finditer(pattern, dot_data):
-                assert_equal(len(search(r"\.\d+", finding.group()).group()),
+                assert (len(search(r"\.\d+", finding.group()).group()) ==
                              precision + 1)
             # check threshold
             for finding in finditer(r"<= \d+\.\d+", dot_data):
-                assert_equal(len(search(r"\.\d+", finding.group()).group()),
+                assert (len(search(r"\.\d+", finding.group()).group()) ==
                              precision + 1)
 
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 8d64150cab8f5..89a1816fd852e 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -164,9 +164,9 @@
 
 
 def assert_tree_equal(d, s, message):
-    assert_equal(s.node_count, d.node_count,
-                 "{0}: inequal number of node ({1} != {2})"
-                 "".format(message, s.node_count, d.node_count))
+    assert s.node_count == d.node_count, (
+        "{0}: inequal number of node ({1} != {2})"
+        "".format(message, s.node_count, d.node_count))
 
     assert_array_equal(d.children_right, s.children_right,
                        message + ": inequal children_right")
@@ -248,13 +248,11 @@ def test_xor():
     for name, Tree in CLF_TREES.items():
         clf = Tree(random_state=0)
         clf.fit(X, y)
-        assert_equal(clf.score(X, y), 1.0,
-                     "Failed with {0}".format(name))
+        assert clf.score(X, y) == 1.0, "Failed with {0}".format(name)
 
         clf = Tree(random_state=0, max_features=1)
         clf.fit(X, y)
-        assert_equal(clf.score(X, y), 1.0,
-                     "Failed with {0}".format(name))
+        assert clf.score(X, y) == 1.0, "Failed with {0}".format(name)
 
 
 def test_iris():
@@ -263,16 +261,16 @@ def test_iris():
         clf = Tree(criterion=criterion, random_state=0)
         clf.fit(iris.data, iris.target)
         score = accuracy_score(clf.predict(iris.data), iris.target)
-        assert_greater(score, 0.9,
-                       "Failed with {0}, criterion = {1} and score = {2}"
-                       "".format(name, criterion, score))
+        assert score > 0.9, (
+            "Failed with {0}, criterion = {1} and score = {2}"
+            "".format(name, criterion, score))
 
         clf = Tree(criterion=criterion, max_features=2, random_state=0)
         clf.fit(iris.data, iris.target)
         score = accuracy_score(clf.predict(iris.data), iris.target)
-        assert_greater(score, 0.5,
-                       "Failed with {0}, criterion = {1} and score = {2}"
-                       "".format(name, criterion, score))
+        assert score > 0.5, (
+            "Failed with {0}, criterion = {1} and score = {2}"
+            "".format(name, criterion, score))
 
 
 def test_boston():
@@ -282,18 +280,18 @@ def test_boston():
         reg = Tree(criterion=criterion, random_state=0)
         reg.fit(boston.data, boston.target)
         score = mean_squared_error(boston.target, reg.predict(boston.data))
-        assert_less(score, 1,
-                    "Failed with {0}, criterion = {1} and score = {2}"
-                    "".format(name, criterion, score))
+        assert score < 1, (
+            "Failed with {0}, criterion = {1} and score = {2}"
+            "".format(name, criterion, score))
 
         # using fewer features reduces the learning ability of this tree,
         # but reduces training time.
         reg = Tree(criterion=criterion, max_features=6, random_state=0)
         reg.fit(boston.data, boston.target)
         score = mean_squared_error(boston.target, reg.predict(boston.data))
-        assert_less(score, 2,
-                    "Failed with {0}, criterion = {1} and score = {2}"
-                    "".format(name, criterion, score))
+        assert score < 2, (
+            "Failed with {0}, criterion = {1} and score = {2}"
+            "".format(name, criterion, score))
 
 
 def test_probability():
@@ -384,8 +382,8 @@ def test_importances():
         importances = clf.feature_importances_
         n_important = np.sum(importances > 0.1)
 
-        assert_equal(importances.shape[0], 10, "Failed with {0}".format(name))
-        assert_equal(n_important, 3, "Failed with {0}".format(name))
+        assert importances.shape[0] == 10, "Failed with {0}".format(name)
+        assert n_important == 3, "Failed with {0}".format(name)
 
     # Check on iris that importances are the same for all builders
     clf = DecisionTreeClassifier(random_state=0)
@@ -435,48 +433,48 @@ def test_max_features():
     for name, TreeRegressor in REG_TREES.items():
         reg = TreeRegressor(max_features="auto")
         reg.fit(boston.data, boston.target)
-        assert_equal(reg.max_features_, boston.data.shape[1])
+        assert reg.max_features_ == boston.data.shape[1]
 
     for name, TreeClassifier in CLF_TREES.items():
         clf = TreeClassifier(max_features="auto")
         clf.fit(iris.data, iris.target)
-        assert_equal(clf.max_features_, 2)
+        assert clf.max_features_ == 2
 
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_features="sqrt")
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_,
+        assert (est.max_features_ ==
                      int(np.sqrt(iris.data.shape[1])))
 
         est = TreeEstimator(max_features="log2")
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_,
+        assert (est.max_features_ ==
                      int(np.log2(iris.data.shape[1])))
 
         est = TreeEstimator(max_features=1)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_, 1)
+        assert est.max_features_ == 1
 
         est = TreeEstimator(max_features=3)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_, 3)
+        assert est.max_features_ == 3
 
         est = TreeEstimator(max_features=0.01)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_, 1)
+        assert est.max_features_ == 1
 
         est = TreeEstimator(max_features=0.5)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_,
+        assert (est.max_features_ ==
                      int(0.5 * iris.data.shape[1]))
 
         est = TreeEstimator(max_features=1.0)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_, iris.data.shape[1])
+        assert est.max_features_ == iris.data.shape[1]
 
         est = TreeEstimator(max_features=None)
         est.fit(iris.data, iris.target)
-        assert_equal(est.max_features_, iris.data.shape[1])
+        assert est.max_features_ == iris.data.shape[1]
 
         # use values of max_features that are invalid
         est = TreeEstimator(max_features=10)
@@ -590,8 +588,7 @@ def test_min_samples_split():
         # count samples on nodes, -1 means it is a leaf
         node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
 
-        assert_greater(np.min(node_samples), 9,
-                       "Failed with {0}".format(name))
+        assert np.min(node_samples) > 9, "Failed with {0}".format(name)
 
         # test for float parameter
         est = TreeEstimator(min_samples_split=0.2,
@@ -601,8 +598,7 @@ def test_min_samples_split():
         # count samples on nodes, -1 means it is a leaf
         node_samples = est.tree_.n_node_samples[est.tree_.children_left != -1]
 
-        assert_greater(np.min(node_samples), 9,
-                       "Failed with {0}".format(name))
+        assert np.min(node_samples) > 9, "Failed with {0}".format(name)
 
 
 def test_min_samples_leaf():
@@ -624,8 +620,7 @@ def test_min_samples_leaf():
         node_counts = np.bincount(out)
         # drop inner nodes
         leaf_count = node_counts[node_counts != 0]
-        assert_greater(np.min(leaf_count), 4,
-                       "Failed with {0}".format(name))
+        assert np.min(leaf_count) > 4, "Failed with {0}".format(name)
 
         # test float parameter
         est = TreeEstimator(min_samples_leaf=0.1,
@@ -636,8 +631,7 @@ def test_min_samples_leaf():
         node_counts = np.bincount(out)
         # drop inner nodes
         leaf_count = node_counts[node_counts != 0]
-        assert_greater(np.min(leaf_count), 4,
-                       "Failed with {0}".format(name))
+        assert np.min(leaf_count) > 4, "Failed with {0}".format(name)
 
 
 def check_min_weight_fraction_leaf(name, datasets, sparse=False):
@@ -671,12 +665,11 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
-            np.min(leaf_weights),
-            total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
-                name, est.min_weight_fraction_leaf))
+        assert (
+            np.min(leaf_weights) >=
+            total_weight * est.min_weight_fraction_leaf), (
+                "Failed with {0} min_weight_fraction_leaf={1}".format(
+                    name, est.min_weight_fraction_leaf))
 
     # test case with no weights passed in
     total_weight = X.shape[0]
@@ -695,12 +688,11 @@ def check_min_weight_fraction_leaf(name, datasets, sparse=False):
         node_weights = np.bincount(out)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
-            np.min(leaf_weights),
-            total_weight * est.min_weight_fraction_leaf,
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}".format(
-                name, est.min_weight_fraction_leaf))
+        assert (
+            np.min(leaf_weights) >=
+            total_weight * est.min_weight_fraction_leaf), (
+                "Failed with {0} min_weight_fraction_leaf={1}".format(
+                    name, est.min_weight_fraction_leaf))
 
 
 @pytest.mark.parametrize("name", ALL_TREES)
@@ -741,15 +733,14 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
         node_weights = np.bincount(out)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
-            np.min(leaf_weights),
+        assert (
+            np.min(leaf_weights) >=
             max((total_weight *
-                 est.min_weight_fraction_leaf), 5),
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}, "
-            "min_samples_leaf={2}".format(name,
-                                          est.min_weight_fraction_leaf,
-                                          est.min_samples_leaf))
+                 est.min_weight_fraction_leaf), 5)), (
+                     "Failed with {0} min_weight_fraction_leaf={1}, "
+                     "min_samples_leaf={2}".format(
+                         name, est.min_weight_fraction_leaf,
+                         est.min_samples_leaf))
     for max_leaf_nodes, frac in product((None, 1000), np.linspace(0, 0.5, 3)):
         # test float min_samples_leaf
         est = TreeEstimator(min_weight_fraction_leaf=frac,
@@ -766,15 +757,14 @@ def check_min_weight_fraction_leaf_with_min_samples_leaf(name, datasets,
         node_weights = np.bincount(out)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
-            np.min(leaf_weights),
+        assert (
+            np.min(leaf_weights) >=
             max((total_weight * est.min_weight_fraction_leaf),
-                (total_weight * est.min_samples_leaf)),
-            "Failed with {0} "
-            "min_weight_fraction_leaf={1}, "
-            "min_samples_leaf={2}".format(name,
-                                          est.min_weight_fraction_leaf,
-                                          est.min_samples_leaf))
+                (total_weight * est.min_samples_leaf))), (
+                    "Failed with {0} min_weight_fraction_leaf={1}, "
+                    "min_samples_leaf={2}".format(name,
+                                                  est.min_weight_fraction_leaf,
+                                                  est.min_samples_leaf))
 
 
 @pytest.mark.parametrize("name", ALL_TREES)
@@ -815,11 +805,10 @@ def test_min_impurity_split():
         for node in range(est.tree_.node_count):
             if (est.tree_.children_left[node] == TREE_LEAF or
                     est.tree_.children_right[node] == TREE_LEAF):
-                assert_equal(est.tree_.impurity[node], 0.,
-                             "Failed with {0} "
-                             "min_impurity_split={1}".format(
-                                 est.tree_.impurity[node],
-                                 est.min_impurity_split))
+                assert est.tree_.impurity[node] == 0., (
+                    "Failed with {0} min_impurity_split={1}".format(
+                        est.tree_.impurity[node],
+                        est.min_impurity_split))
 
         # verify leaf nodes have impurity [0,min_impurity_split] when using
         # min_impurity_split
@@ -832,16 +821,14 @@ def test_min_impurity_split():
         for node in range(est.tree_.node_count):
             if (est.tree_.children_left[node] == TREE_LEAF or
                     est.tree_.children_right[node] == TREE_LEAF):
-                assert_greater_equal(est.tree_.impurity[node], 0,
-                                     "Failed with {0}, "
-                                     "min_impurity_split={1}".format(
-                                         est.tree_.impurity[node],
-                                         est.min_impurity_split))
-                assert_less_equal(est.tree_.impurity[node], min_impurity_split,
-                                  "Failed with {0}, "
-                                  "min_impurity_split={1}".format(
-                                      est.tree_.impurity[node],
-                                      est.min_impurity_split))
+                assert est.tree_.impurity[node] >= 0, (
+                    "Failed with {0}, min_impurity_split={1}".format(
+                        est.tree_.impurity[node],
+                        est.min_impurity_split))
+                assert est.tree_.impurity[node] <= min_impurity_split, (
+                    "Failed with {0}, min_impurity_split={1}".format(
+                    est.tree_.impurity[node],
+                    est.min_impurity_split))
 
 
 def test_min_impurity_decrease():
@@ -868,10 +855,10 @@ def test_min_impurity_decrease():
 
         for est, expected_decrease in ((est1, 1e-7), (est2, 0.05),
                                        (est3, 0.0001), (est4, 0.1)):
-            assert_less_equal(est.min_impurity_decrease, expected_decrease,
-                              "Failed, min_impurity_decrease = {0} > {1}"
-                              .format(est.min_impurity_decrease,
-                                      expected_decrease))
+            assert est.min_impurity_decrease <= expected_decrease, (
+                "Failed, min_impurity_decrease = {0} > {1}".format(
+                    est.min_impurity_decrease,
+                    expected_decrease))
             est.fit(X, y)
             for node in range(est.tree_.node_count):
                 # If current node is a not leaf node, check if the split was
@@ -899,11 +886,10 @@ def test_min_impurity_decrease():
                     actual_decrease = fractional_node_weight * (
                         imp_parent - wtd_avg_left_right_imp)
 
-                    assert_greater_equal(actual_decrease, expected_decrease,
-                                         "Failed with {0} "
-                                         "expected min_impurity_decrease={1}"
-                                         .format(actual_decrease,
-                                                 expected_decrease))
+                    assert actual_decrease >= expected_decrease, (
+                        "Failed with {0} expected min_impurity_decrease={1}"
+                        .format(actual_decrease,
+                                expected_decrease))
 
     for name, TreeEstimator in ALL_TREES.items():
         if "Classifier" in name:
@@ -920,17 +906,17 @@ def test_min_impurity_decrease():
 
         serialized_object = pickle.dumps(est)
         est2 = pickle.loads(serialized_object)
-        assert_equal(type(est2), est.__class__)
+        assert type(est2) == est.__class__
         score2 = est2.score(X, y)
-        assert_equal(score, score2,
-                     "Failed to generate same score  after pickling "
-                     "with {0}".format(name))
+        assert score == score2, (
+            "Failed to generate same score  after pickling "
+            "with {0}".format(name))
 
         for attribute in fitted_attribute:
-            assert_equal(getattr(est2.tree_, attribute),
-                         fitted_attribute[attribute],
-                         "Failed to generate same attribute {0} after "
-                         "pickling with {1}".format(attribute, name))
+            assert (getattr(est2.tree_, attribute) ==
+                    fitted_attribute[attribute]), (
+                        "Failed to generate same attribute {0} after "
+                        "pickling with {1}".format(attribute, name))
 
 
 def test_multioutput():
@@ -969,24 +955,24 @@ def test_multioutput():
         clf = TreeClassifier(random_state=0)
         y_hat = clf.fit(X, y).predict(T)
         assert_array_equal(y_hat, y_true)
-        assert_equal(y_hat.shape, (4, 2))
+        assert y_hat.shape == (4, 2)
 
         proba = clf.predict_proba(T)
-        assert_equal(len(proba), 2)
-        assert_equal(proba[0].shape, (4, 2))
-        assert_equal(proba[1].shape, (4, 4))
+        assert len(proba) == 2
+        assert proba[0].shape == (4, 2)
+        assert proba[1].shape == (4, 4)
 
         log_proba = clf.predict_log_proba(T)
-        assert_equal(len(log_proba), 2)
-        assert_equal(log_proba[0].shape, (4, 2))
-        assert_equal(log_proba[1].shape, (4, 4))
+        assert len(log_proba) == 2
+        assert log_proba[0].shape == (4, 2)
+        assert log_proba[1].shape == (4, 4)
 
     # toy regression problem
     for name, TreeRegressor in REG_TREES.items():
         reg = TreeRegressor(random_state=0)
         y_hat = reg.fit(X, y).predict(T)
         assert_almost_equal(y_hat, y_true)
-        assert_equal(y_hat.shape, (4, 2))
+        assert y_hat.shape == (4, 2)
 
 
 def test_classes_shape():
@@ -996,15 +982,15 @@ def test_classes_shape():
         clf = TreeClassifier(random_state=0)
         clf.fit(X, y)
 
-        assert_equal(clf.n_classes_, 2)
+        assert clf.n_classes_ == 2
         assert_array_equal(clf.classes_, [-1, 1])
 
         # Classification, multi-output
         _y = np.vstack((y, np.array(y) * 2)).T
         clf = TreeClassifier(random_state=0)
         clf.fit(X, _y)
-        assert_equal(len(clf.n_classes_), 2)
-        assert_equal(len(clf.classes_), 2)
+        assert len(clf.n_classes_) == 2
+        assert len(clf.classes_) == 2
         assert_array_equal(clf.n_classes_, [2, 2])
         assert_array_equal(clf.classes_, [[-1, 1], [-2, 2]])
 
@@ -1090,12 +1076,12 @@ def test_sample_weight():
     sample_weight[y == 2] = .51  # Samples of class '2' are still weightier
     clf = DecisionTreeClassifier(max_depth=1, random_state=0)
     clf.fit(X, y, sample_weight=sample_weight)
-    assert_equal(clf.tree_.threshold[0], 149.5)
+    assert clf.tree_.threshold[0] == 149.5
 
     sample_weight[y == 2] = .5  # Samples of class '2' are no longer weightier
     clf = DecisionTreeClassifier(max_depth=1, random_state=0)
     clf.fit(X, y, sample_weight=sample_weight)
-    assert_equal(clf.tree_.threshold[0], 49.5)  # Threshold should have moved
+    assert clf.tree_.threshold[0] == 49.5  # Threshold should have moved
 
     # Test that sample weighting is the same as having duplicates
     X = iris.data
@@ -1214,7 +1200,7 @@ def test_max_leaf_nodes():
     k = 4
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_depth=None, max_leaf_nodes=k + 1).fit(X, y)
-        assert_equal(est.get_n_leaves(), k + 1)
+        assert est.get_n_leaves() == k + 1
 
         # max_leaf_nodes in (0, 1) should raise ValueError
         est = TreeEstimator(max_depth=None, max_leaf_nodes=0)
@@ -1231,7 +1217,7 @@ def test_max_leaf_nodes_max_depth():
     k = 4
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(max_depth=1, max_leaf_nodes=k).fit(X, y)
-        assert_equal(est.get_depth(), 1)
+        assert est.get_depth() == 1
 
 
 def test_arrays_persist():
@@ -1253,7 +1239,7 @@ def test_only_constant_features():
     for name, TreeEstimator in ALL_TREES.items():
         est = TreeEstimator(random_state=0)
         est.fit(X, y)
-        assert_equal(est.tree_.max_depth, 0)
+        assert est.tree_.max_depth == 0
 
 
 def test_behaviour_constant_feature_after_splits():
@@ -1265,8 +1251,8 @@ def test_behaviour_constant_feature_after_splits():
         if "ExtraTree" not in name:
             est = TreeEstimator(random_state=0, max_features=1)
             est.fit(X, y)
-            assert_equal(est.tree_.max_depth, 2)
-            assert_equal(est.tree_.node_count, 5)
+            assert est.tree_.max_depth == 2
+            assert est.tree_.node_count == 5
 
 
 def test_with_only_one_non_constant_features():
@@ -1277,13 +1263,13 @@ def test_with_only_one_non_constant_features():
     for name, TreeEstimator in CLF_TREES.items():
         est = TreeEstimator(random_state=0, max_features=1)
         est.fit(X, y)
-        assert_equal(est.tree_.max_depth, 1)
+        assert est.tree_.max_depth == 1
         assert_array_equal(est.predict_proba(X), np.full((4, 2), 0.5))
 
     for name, TreeEstimator in REG_TREES.items():
         est = TreeEstimator(random_state=0, max_features=1)
         est.fit(X, y)
-        assert_equal(est.tree_.max_depth, 1)
+        assert est.tree_.max_depth == 1
         assert_array_equal(est.predict(X), np.full((4, ), 0.5))
 
 
@@ -1294,7 +1280,7 @@ def test_big_input():
     try:
         clf.fit(X, [0, 1, 0, 1])
     except ValueError as e:
-        assert_in("float32", str(e))
+        assert "float32" in str(e)
 
 
 def test_realloc():
@@ -1494,8 +1480,8 @@ def check_explicit_sparse_zeros(tree, max_depth=3,
     X_sparse_test = X_sparse_test.copy()
 
     # Ensure that we have explicit zeros
-    assert_greater((X_sparse.data == 0.).sum(), 0)
-    assert_greater((X_sparse_test.data == 0.).sum(), 0)
+    assert (X_sparse.data == 0.).sum() > 0
+    assert (X_sparse_test.data == 0.).sum() > 0
 
     # Perform the comparison
     d = TreeEstimator(random_state=0, max_depth=max_depth).fit(X, y)
@@ -1554,11 +1540,11 @@ def test_1d_input(name):
 def _check_min_weight_leaf_split_level(TreeEstimator, X, y, sample_weight):
     est = TreeEstimator(random_state=0)
     est.fit(X, y, sample_weight=sample_weight)
-    assert_equal(est.tree_.max_depth, 1)
+    assert est.tree_.max_depth == 1
 
     est = TreeEstimator(random_state=0, min_weight_fraction_leaf=0.4)
     est.fit(X, y, sample_weight=sample_weight)
-    assert_equal(est.tree_.max_depth, 0)
+    assert est.tree_.max_depth == 0
 
 
 def check_min_weight_leaf_split_level(name):
@@ -1656,7 +1642,7 @@ def check_decision_path(name):
 
     node_indicator_csr = est.decision_path(X)
     node_indicator = node_indicator_csr.toarray()
-    assert_equal(node_indicator.shape, (n_samples, est.tree_.node_count))
+    assert node_indicator.shape == (n_samples, est.tree_.node_count)
 
     # Assert that leaves index are correct
     leaves = est.apply(X)
@@ -1670,7 +1656,7 @@ def check_decision_path(name):
 
     # Ensure max depth is consistent with sum of indicator
     max_depth = node_indicator.sum(axis=1).max()
-    assert_less_equal(est.tree_.max_depth, max_depth)
+    assert est.tree_.max_depth <= max_depth
 
 
 @pytest.mark.parametrize("name", ALL_TREES)
@@ -1801,17 +1787,17 @@ def _pickle_copy(obj):
             criteria = typename(n_outputs, n_classes)
             result = copy_func(criteria).__reduce__()
             typename_, (n_outputs_, n_classes_), _ = result
-            assert_equal(typename, typename_)
-            assert_equal(n_outputs, n_outputs_)
+            assert typename == typename_
+            assert n_outputs == n_outputs_
             assert_array_equal(n_classes, n_classes_)
 
         for _, typename in CRITERIA_REG.items():
             criteria = typename(n_outputs, n_samples)
             result = copy_func(criteria).__reduce__()
             typename_, (n_outputs_, n_samples_), _ = result
-            assert_equal(typename, typename_)
-            assert_equal(n_outputs, n_outputs_)
-            assert_equal(n_samples, n_samples_)
+            assert typename == typename_
+            assert n_outputs == n_outputs_
+            assert n_samples == n_samples_
 
 
 def test_empty_leaf_infinite_threshold():

From 99e6b5a435a4d55bf2444f084f8c171a3a1dc8c6 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:26:56 +0200
Subject: [PATCH 19/22] fix utils

---
 sklearn/utils/estimator_checks.py            | 80 ++++++++++----------
 sklearn/utils/tests/test_class_weight.py     | 10 +--
 sklearn/utils/tests/test_estimator_checks.py |  4 +-
 sklearn/utils/tests/test_extmath.py          | 34 ++++-----
 sklearn/utils/tests/test_fast_dict.py        | 12 +--
 sklearn/utils/tests/test_multiclass.py       |  5 +-
 sklearn/utils/tests/test_murmurhash.py       | 34 ++++-----
 sklearn/utils/tests/test_random.py           | 14 ++--
 sklearn/utils/tests/test_sparsefuncs.py      | 21 ++---
 sklearn/utils/tests/test_testing.py          | 20 ++---
 sklearn/utils/tests/test_utils.py            | 26 +++----
 sklearn/utils/tests/test_validation.py       | 72 +++++++++---------
 12 files changed, 165 insertions(+), 167 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 0bec5c3911681..42220406b477d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -523,16 +523,16 @@ def check_estimator_sparse_data(name, estimator_orig):
             if hasattr(estimator, "predict"):
                 pred = estimator.predict(X)
                 if tags['multioutput_only']:
-                    assert_equal(pred.shape, (X.shape[0], 1))
+                    assert pred.shape == (X.shape[0], 1)
                 else:
-                    assert_equal(pred.shape, (X.shape[0],))
+                    assert pred.shape == (X.shape[0],)
             if hasattr(estimator, 'predict_proba'):
                 probs = estimator.predict_proba(X)
                 if tags['binary_only']:
                     expected_probs_shape = (X.shape[0], 2)
                 else:
                     expected_probs_shape = (X.shape[0], 4)
-                assert_equal(probs.shape, expected_probs_shape)
+                assert probs.shape == expected_probs_shape
         except (TypeError, ValueError) as e:
             if 'sparse' not in repr(e).lower():
                 if "64" in matrix_format:
@@ -721,8 +721,7 @@ def check_dict_unchanged(name, estimator_orig):
         if hasattr(estimator, method):
             dict_before = estimator.__dict__.copy()
             getattr(estimator, method)(X)
-            assert_dict_equal(estimator.__dict__, dict_before,
-                              'Estimator changes __dict__ during %s' % method)
+            assert estimator.__dict__ == dict_before, 'Estimator changes __dict__ during %s' % method
 
 
 def is_public_parameter(attr):
@@ -1021,10 +1020,10 @@ def _check_transformer(name, transformer_orig, X, y):
 
     if isinstance(X_pred, tuple):
         for x_pred in X_pred:
-            assert_equal(x_pred.shape[0], n_samples)
+            assert x_pred.shape[0] == n_samples
     else:
         # check for consistent n_samples
-        assert_equal(X_pred.shape[0], n_samples)
+        assert X_pred.shape[0] == n_samples
 
     if hasattr(transformer, 'transform'):
         if name in CROSS_DECOMPOSITION:
@@ -1060,8 +1059,8 @@ def _check_transformer(name, transformer_orig, X, y):
                 err_msg="consecutive fit_transform outcomes "
                         "not consistent in %s"
                 % transformer)
-            assert_equal(_num_samples(X_pred2), n_samples)
-            assert_equal(_num_samples(X_pred3), n_samples)
+            assert _num_samples(X_pred2) == n_samples
+            assert _num_samples(X_pred3) == n_samples
 
         # raises error on malformed input for transform
         if hasattr(X, 'T') and not _safe_tags(transformer, "stateless"):
@@ -1355,8 +1354,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False):
     clusterer.fit(X.tolist())
 
     pred = clusterer.labels_
-    assert_equal(pred.shape, (n_samples,))
-    assert_greater(adjusted_rand_score(pred, y), 0.4)
+    assert pred.shape == (n_samples,)
+    assert adjusted_rand_score(pred, y) > 0.4
     if _safe_tags(clusterer, 'non_deterministic'):
         return
     set_random_state(clusterer)
@@ -1365,8 +1364,8 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False):
     assert_array_equal(pred, pred2)
 
     # fit_predict(X) and labels_ should be of type int
-    assert_in(pred.dtype, [np.dtype('int32'), np.dtype('int64')])
-    assert_in(pred2.dtype, [np.dtype('int32'), np.dtype('int64')])
+    assert pred.dtype in [np.dtype('int32'), np.dtype('int64')]
+    assert pred2.dtype in [np.dtype('int32'), np.dtype('int64')]
 
     # Add noise to X to test the possible values of the labels
     labels = clusterer.fit_predict(X_noise)
@@ -1383,7 +1382,7 @@ def check_clustering(name, clusterer_orig, readonly_memmap=False):
     # Labels should be less than n_clusters - 1
     if hasattr(clusterer, 'n_clusters'):
         n_clusters = getattr(clusterer, 'n_clusters')
-        assert_greater_equal(n_clusters - 1, labels_sorted[-1])
+        assert n_clusters - 1 >= labels_sorted[-1]
     # else labels should be less than max(labels_) which is necessarily true
 
 
@@ -1484,10 +1483,10 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
         assert hasattr(classifier, "classes_")
         y_pred = classifier.predict(X)
 
-        assert_equal(y_pred.shape, (n_samples,))
+        assert y_pred.shape == (n_samples,)
         # training set performance
         if not tags['poor_score']:
-            assert_greater(accuracy_score(y, y_pred), 0.83)
+            assert accuracy_score(y, y_pred) > 0.83
 
         # raises error on malformed input for predict
         msg_pairwise = (
@@ -1512,13 +1511,13 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
                 decision = classifier.decision_function(X)
                 if n_classes == 2:
                     if not tags["multioutput_only"]:
-                        assert_equal(decision.shape, (n_samples,))
+                        assert decision.shape == (n_samples,)
                     else:
-                        assert_equal(decision.shape, (n_samples, 1))
+                        assert decision.shape == (n_samples, 1)
                     dec_pred = (decision.ravel() > 0).astype(np.int)
                     assert_array_equal(dec_pred, y_pred)
                 else:
-                    assert_equal(decision.shape, (n_samples, n_classes))
+                    assert decision.shape == (n_samples, n_classes)
                     assert_array_equal(np.argmax(decision, axis=1), y_pred)
 
                 # raises error on malformed input for decision_function
@@ -1537,7 +1536,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
         if hasattr(classifier, "predict_proba"):
             # predict_proba agrees with predict
             y_prob = classifier.predict_proba(X)
-            assert_equal(y_prob.shape, (n_samples, n_classes))
+            assert y_prob.shape == (n_samples, n_classes)
             assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
             # check that probas for all classes sum to one
             assert_array_almost_equal(np.sum(y_prob, axis=1),
@@ -1724,7 +1723,7 @@ def check_supervised_y_2d(name, estimator_orig):
         ", ".join([str(w_x) for w_x in w]))
     if not tags['multioutput']:
         # check that we warned if we don't support multi-output
-        assert_greater(len(w), 0, msg)
+        assert len(w) > 0, msg
         assert "DataConversionWarning('A column-vector y" \
                " was passed when a 1d array was expected" in msg
     assert_allclose(y_pred.ravel(), y_pred_2d.ravel())
@@ -1879,13 +1878,13 @@ def check_regressors_train(name, regressor_orig, readonly_memmap=False):
     regressor.fit(X, y_)
     regressor.fit(X.tolist(), y_.tolist())
     y_pred = regressor.predict(X)
-    assert_equal(y_pred.shape, y_.shape)
+    assert y_pred.shape == y_.shape
 
     # TODO: find out why PLS and CCA fail. RANSAC is random
     # and furthermore assumes the presence of outliers, hence
     # skipped
     if not _safe_tags(regressor, "poor_score"):
-        assert_greater(regressor.score(X, y_), 0.5)
+        assert regressor.score(X, y_) > 0.5
 
 
 @ignore_warnings
@@ -1961,7 +1960,7 @@ def check_class_weight_classifiers(name, classifier_orig):
         y_pred = classifier.predict(X_test)
         # XXX: Generally can use 0.89 here. On Windows, LinearSVC gets
         #      0.88 (Issue #9111)
-        assert_greater(np.mean(y_pred == 0), 0.87)
+        assert np.mean(y_pred == 0) > 0.87
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
@@ -1980,7 +1979,7 @@ def check_class_weight_balanced_classifiers(name, classifier_orig, X_train,
     classifier.set_params(class_weight='balanced')
     classifier.fit(X_train, y_train)
     y_pred_balanced = classifier.predict(X_test)
-    assert_greater(f1_score(y_test, y_pred_balanced, average='weighted'),
+    assert (f1_score(y_test, y_pred_balanced, average='weighted') >
                    f1_score(y_test, y_pred, average='weighted'))
 
 
@@ -2056,10 +2055,10 @@ def check_estimators_overwrite_params(name, estimator_orig):
         # The only exception to this rule of immutable constructor parameters
         # is possible RandomState instance but in this check we explicitly
         # fixed the random_state params recursively to be integer seeds.
-        assert_equal(joblib.hash(new_value), joblib.hash(original_value),
-                     "Estimator %s should not change or mutate "
-                     " the parameter %s from %s to %s during fit."
-                     % (name, param_name, original_value, new_value))
+        assert joblib.hash(new_value) == joblib.hash(original_value), (
+            "Estimator %s should not change or mutate "
+            " the parameter %s from %s to %s during fit."
+            % (name, param_name, original_value, new_value))
 
 
 def check_no_attributes_set_in_init(name, estimator):
@@ -2212,13 +2211,13 @@ def param_filter(p):
             init_params = init_params[1:]
 
         for init_param in init_params:
-            assert_not_equal(init_param.default, init_param.empty,
-                             "parameter %s for %s has no default value"
-                             % (init_param.name, type(estimator).__name__))
+            assert init_param.default != init_param.empty, (
+                "parameter %s for %s has no default value"
+                % (init_param.name, type(estimator).__name__))
             if type(init_param.default) is type:
-                assert_in(init_param.default, [np.float64, np.int64])
+                assert init_param.default in [np.float64, np.int64]
             else:
-                assert_in(type(init_param.default),
+                assert (type(init_param.default) in
                           [str, int, float, bool, tuple, type(None),
                            np.float64, types.FunctionType, joblib.Memory])
             if init_param.name not in params.keys():
@@ -2308,9 +2307,9 @@ def check_transformer_n_iter(name, estimator_orig):
         # These return a n_iter per component.
         if name in CROSS_DECOMPOSITION:
             for iter_ in estimator.n_iter_:
-                assert_greater_equal(iter_, 1)
+                assert iter_ >= 1
         else:
-            assert_greater_equal(estimator.n_iter_, 1)
+            assert estimator.n_iter_ >= 1
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
@@ -2336,7 +2335,7 @@ def check_set_params(name, estimator_orig):
 
     estimator.set_params(**orig_params)
     curr_params = estimator.get_params(deep=False)
-    assert_equal(set(orig_params.keys()), set(curr_params.keys()), msg)
+    assert set(orig_params.keys()) == set(curr_params.keys()), msg
     for k, v in curr_params.items():
         assert orig_params[k] is v, msg
 
@@ -2364,7 +2363,7 @@ def check_set_params(name, estimator_orig):
                 params_before_exception = curr_params
                 curr_params = estimator.get_params(deep=False)
                 try:
-                    assert_equal(set(params_before_exception.keys()),
+                    assert (set(params_before_exception.keys()) ==
                                  set(curr_params.keys()))
                     for k, v in curr_params.items():
                         assert params_before_exception[k] is v
@@ -2372,9 +2371,8 @@ def check_set_params(name, estimator_orig):
                     warnings.warn(change_warning_msg)
             else:
                 curr_params = estimator.get_params(deep=False)
-                assert_equal(set(test_params.keys()),
-                             set(curr_params.keys()),
-                             msg)
+                assert (set(test_params.keys()) ==
+                             set(curr_params.keys())), msg
                 for k, v in curr_params.items():
                     assert test_params[k] is v, msg
         test_params[param_name] = default_value
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index 751243f796a4c..e67fa6eb898ec 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -99,14 +99,14 @@ def test_compute_class_weight_balanced_negative():
     y = np.asarray([-1, -1, 0, 0, -2, -2])
 
     cw = compute_class_weight("balanced", classes, y)
-    assert_equal(len(cw), len(classes))
+    assert len(cw) == len(classes)
     assert_array_almost_equal(cw, np.array([1., 1., 1.]))
 
     # Test with unbalanced class labels.
     y = np.asarray([-1, 0, 0, -2, -2, -2])
 
     cw = compute_class_weight("balanced", classes, y)
-    assert_equal(len(cw), len(classes))
+    assert len(cw) == len(classes)
     class_counts = np.bincount(y + 2)
     assert_almost_equal(np.dot(cw, class_counts), y.shape[0])
     assert_array_almost_equal(cw, [2. / 3, 2., 1.])
@@ -132,16 +132,16 @@ def test_compute_class_weight_default():
 
     # Test for non specified weights
     cw = compute_class_weight(None, classes, y)
-    assert_equal(len(cw), classes_len)
+    assert len(cw) == classes_len
     assert_array_almost_equal(cw, np.ones(3))
 
     # Tests for partly specified weights
     cw = compute_class_weight({2: 1.5}, classes, y)
-    assert_equal(len(cw), classes_len)
+    assert len(cw) == classes_len
     assert_array_almost_equal(cw, [1.5, 1., 1.])
 
     cw = compute_class_weight({2: 1.5, 4: 0.5}, classes, y)
-    assert_equal(len(cw), classes_len)
+    assert len(cw) == classes_len
     assert_array_almost_equal(cw, [1.5, 1., 0.5])
 
 
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 73ec02457256c..abcd97a9a41db 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -456,7 +456,7 @@ def test_check_estimator_clones():
             # without fitting
             old_hash = joblib.hash(est)
             check_estimator(est)
-        assert_equal(old_hash, joblib.hash(est))
+        assert old_hash == joblib.hash(est)
 
         with ignore_warnings(category=(FutureWarning, DeprecationWarning)):
             # when 'est = SGDClassifier()'
@@ -467,7 +467,7 @@ def test_check_estimator_clones():
             est.fit(iris.data + 10, iris.target)
             old_hash = joblib.hash(est)
             check_estimator(est)
-        assert_equal(old_hash, joblib.hash(est))
+        assert old_hash == joblib.hash(est)
 
 
 def test_check_estimators_unfitted():
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index aad228d4548aa..bc8b598764b1a 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -47,7 +47,7 @@ def test_density():
     X_lil = sparse.lil_matrix(X)
 
     for X_ in (X_csr, X_csc, X_coo, X_lil):
-        assert_equal(density(X_), density(X))
+        assert density(X_) == density(X)
 
 
 def test_uniform_weights():
@@ -96,7 +96,7 @@ def check_randomized_svd_low_rank(dtype):
     X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                              effective_rank=rank, tail_strength=0.0,
                              random_state=0).astype(dtype, copy=False)
-    assert_equal(X.shape, (n_samples, n_features))
+    assert X.shape == (n_samples, n_features)
 
     # compute the singular values of X using the slow exact method
     U, s, V = linalg.svd(X, full_matrices=False)
@@ -123,9 +123,9 @@ def check_randomized_svd_low_rank(dtype):
             assert sa.dtype == np.float64
             assert Va.dtype == np.float64
 
-        assert_equal(Ua.shape, (n_samples, k))
-        assert_equal(sa.shape, (k,))
-        assert_equal(Va.shape, (k, n_features))
+        assert Ua.shape == (n_samples, k)
+        assert sa.shape == (k,)
+        assert Va.shape == (k, n_features)
 
         # ensure that the singular values of both methods are equal up to the
         # real rank of the matrix
@@ -203,7 +203,7 @@ def test_randomized_svd_low_rank_with_noise():
     X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                              effective_rank=rank, tail_strength=0.1,
                              random_state=0)
-    assert_equal(X.shape, (n_samples, n_features))
+    assert X.shape == (n_samples, n_features)
 
     # compute the singular values of X using the slow exact method
     _, s, _ = linalg.svd(X, full_matrices=False)
@@ -216,7 +216,7 @@ def test_randomized_svd_low_rank_with_noise():
                                   random_state=0)
 
         # the approximation does not tolerate the noise:
-        assert_greater(np.abs(s[:k] - sa).max(), 0.01)
+        assert np.abs(s[:k] - sa).max() > 0.01
 
         # compute the singular values of X using the fast approximate
         # method with iterated power method
@@ -240,7 +240,7 @@ def test_randomized_svd_infinite_rank():
     X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                              effective_rank=rank, tail_strength=1.0,
                              random_state=0)
-    assert_equal(X.shape, (n_samples, n_features))
+    assert X.shape == (n_samples, n_features)
 
     # compute the singular values of X using the slow exact method
     _, s, _ = linalg.svd(X, full_matrices=False)
@@ -251,7 +251,7 @@ def test_randomized_svd_infinite_rank():
                                   power_iteration_normalizer=normalizer)
 
         # the approximation does not tolerate the noise:
-        assert_greater(np.abs(s[:k] - sa).max(), 0.1)
+        assert np.abs(s[:k] - sa).max() > 0.1
 
         # compute the singular values of X using the fast approximate method
         # with iterated power method
@@ -273,7 +273,7 @@ def test_randomized_svd_transpose_consistency():
     X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features,
                              effective_rank=rank, tail_strength=0.5,
                              random_state=0)
-    assert_equal(X.shape, (n_samples, n_features))
+    assert X.shape == (n_samples, n_features)
 
     U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False,
                                 random_state=0)
@@ -313,7 +313,7 @@ def test_randomized_svd_power_iteration_normalizer():
                              power_iteration_normalizer='none')
     A = X - U.dot(np.diag(s).dot(V))
     error_20 = linalg.norm(A, ord='fro')
-    assert_greater(np.abs(error_2 - error_20), 100)
+    assert np.abs(error_2 - error_20) > 100
 
     for normalizer in ['LU', 'QR', 'auto']:
         U, s, V = randomized_svd(X, n_components, n_iter=2,
@@ -328,7 +328,7 @@ def test_randomized_svd_power_iteration_normalizer():
                                      random_state=0)
             A = X - U.dot(np.diag(s).dot(V))
             error = linalg.norm(A, ord='fro')
-            assert_greater(15, np.abs(error_2 - error))
+            assert 15 > np.abs(error_2 - error)
 
 
 def test_randomized_svd_sparse_warnings():
@@ -552,7 +552,7 @@ def naive_mean_variance_update(x, last_mean, last_variance,
         stable_var = two_pass_var
 
     # Naive one pass var: >tol (=1063)
-    assert_greater(np.abs(stable_var(A) - one_pass_var(A)).max(), tol)
+    assert np.abs(stable_var(A) - one_pass_var(A)).max() > tol
 
     # Starting point for online algorithms: after A0
 
@@ -561,10 +561,10 @@ def naive_mean_variance_update(x, last_mean, last_variance,
     for i in range(A1.shape[0]):
         mean, var, n = \
             naive_mean_variance_update(A1[i, :], mean, var, n)
-    assert_equal(n, A.shape[0])
+    assert n == A.shape[0]
     # the mean is also slightly unstable
-    assert_greater(np.abs(A.mean(axis=0) - mean).max(), 1e-6)
-    assert_greater(np.abs(stable_var(A) - var).max(), tol)
+    assert np.abs(A.mean(axis=0) - mean).max() > 1e-6
+    assert np.abs(stable_var(A) - var).max() > tol
 
     # Robust implementation: <tol (177)
     mean, var = A0[0, :], np.zeros(n_features)
@@ -575,7 +575,7 @@ def naive_mean_variance_update(x, last_mean, last_variance,
                                       mean, var, n)
     assert_array_equal(n, A.shape[0])
     assert_array_almost_equal(A.mean(axis=0), mean)
-    assert_greater(tol, np.abs(stable_var(A) - var).max())
+    assert tol > np.abs(stable_var(A) - var).max()
 
 
 def test_incremental_variance_ddof():
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 1131257330dcf..b060c5f599e9e 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -13,15 +13,15 @@ def test_int_float_dict():
 
     d = IntFloatDict(keys, values)
     for key, value in zip(keys, values):
-        assert_equal(d[key], value)
-    assert_equal(len(d), len(keys))
+        assert d[key] == value
+    assert len(d) == len(keys)
 
     d.append(120, 3.)
-    assert_equal(d[120], 3.0)
-    assert_equal(len(d), len(keys) + 1)
+    assert d[120] == 3.0
+    assert len(d) == len(keys) + 1
     for i in range(2000):
         d.append(i + 1000, 4.0)
-    assert_equal(d[1100], 4.0)
+    assert d[1100] == 4.0
 
 
 def test_int_float_dict_argmin():
@@ -29,4 +29,4 @@ def test_int_float_dict_argmin():
     keys = np.arange(100, dtype=np.intp)
     values = np.arange(100, dtype=np.float64)
     d = IntFloatDict(keys, values)
-    assert_equal(argmin(d), (0, 0))
+    assert argmin(d) == (0, 0)
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 6adce033155bd..962d927f43ba1 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -280,9 +280,8 @@ def test_check_classification_targets():
 def test_type_of_target():
     for group, group_examples in EXAMPLES.items():
         for example in group_examples:
-            assert_equal(type_of_target(example), group,
-                         msg=('type_of_target(%r) should be %r, got %r'
-                              % (example, group, type_of_target(example))))
+            assert type_of_target(example) == group, ('type_of_target(%r) should be %r, got %r'
+                              % (example, group, type_of_target(example)))
 
     for example in NON_ARRAY_LIKE_EXAMPLES:
         msg_regex = r'Expected array-like \(array or non-string sequence\).*'
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index 6066012fa0162..abd03bff61d29 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -10,17 +10,17 @@
 
 
 def test_mmhash3_int():
-    assert_equal(murmurhash3_32(3), 847579505)
-    assert_equal(murmurhash3_32(3, seed=0), 847579505)
-    assert_equal(murmurhash3_32(3, seed=42), -1823081949)
+    assert murmurhash3_32(3) == 847579505
+    assert murmurhash3_32(3, seed=0) == 847579505
+    assert murmurhash3_32(3, seed=42) == -1823081949
 
-    assert_equal(murmurhash3_32(3, positive=False), 847579505)
-    assert_equal(murmurhash3_32(3, seed=0, positive=False), 847579505)
-    assert_equal(murmurhash3_32(3, seed=42, positive=False), -1823081949)
+    assert murmurhash3_32(3, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=False) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=False) == -1823081949
 
-    assert_equal(murmurhash3_32(3, positive=True), 847579505)
-    assert_equal(murmurhash3_32(3, seed=0, positive=True), 847579505)
-    assert_equal(murmurhash3_32(3, seed=42, positive=True), 2471885347)
+    assert murmurhash3_32(3, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=0, positive=True) == 847579505
+    assert murmurhash3_32(3, seed=42, positive=True) == 2471885347
 
 
 def test_mmhash3_int_array():
@@ -43,19 +43,19 @@ def test_mmhash3_int_array():
 
 
 def test_mmhash3_bytes():
-    assert_equal(murmurhash3_32(b'foo', 0), -156908512)
-    assert_equal(murmurhash3_32(b'foo', 42), -1322301282)
+    assert murmurhash3_32(b'foo', 0) == -156908512
+    assert murmurhash3_32(b'foo', 42) == -1322301282
 
-    assert_equal(murmurhash3_32(b'foo', 0, positive=True), 4138058784)
-    assert_equal(murmurhash3_32(b'foo', 42, positive=True), 2972666014)
+    assert murmurhash3_32(b'foo', 0, positive=True) == 4138058784
+    assert murmurhash3_32(b'foo', 42, positive=True) == 2972666014
 
 
 def test_mmhash3_unicode():
-    assert_equal(murmurhash3_32('foo', 0), -156908512)
-    assert_equal(murmurhash3_32('foo', 42), -1322301282)
+    assert murmurhash3_32('foo', 0) == -156908512
+    assert murmurhash3_32('foo', 42) == -1322301282
 
-    assert_equal(murmurhash3_32('foo', 0, positive=True), 4138058784)
-    assert_equal(murmurhash3_32('foo', 42, positive=True), 2972666014)
+    assert murmurhash3_32('foo', 0, positive=True) == 4138058784
+    assert murmurhash3_32('foo', 42, positive=True) == 2972666014
 
 
 def test_no_collision_on_byte_range():
diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py
index 5e31174d725ee..2798edad88cab 100644
--- a/sklearn/utils/tests/test_random.py
+++ b/sklearn/utils/tests/test_random.py
@@ -37,13 +37,13 @@ def check_edge_case_of_sample_int(sample_without_replacement):
     assert_raises(ValueError, sample_without_replacement, 1, 2)
 
     # n_population == n_samples
-    assert_equal(sample_without_replacement(0, 0).shape, (0, ))
+    assert sample_without_replacement(0, 0).shape == (0, )
 
-    assert_equal(sample_without_replacement(1, 1).shape, (1, ))
+    assert sample_without_replacement(1, 1).shape == (1, )
 
     # n_population >= n_samples
-    assert_equal(sample_without_replacement(5, 0).shape, (0, ))
-    assert_equal(sample_without_replacement(5, 1).shape, (1, ))
+    assert sample_without_replacement(5, 0).shape == (0, )
+    assert sample_without_replacement(5, 1).shape == (1, )
 
     # n_population < 0 or n_samples < 0
     assert_raises(ValueError, sample_without_replacement, -1, 5)
@@ -59,13 +59,13 @@ def check_sample_int(sample_without_replacement):
 
     for n_samples in range(n_population + 1):
         s = sample_without_replacement(n_population, n_samples)
-        assert_equal(len(s), n_samples)
+        assert len(s) == n_samples
         unique = np.unique(s)
-        assert_equal(np.size(unique), n_samples)
+        assert np.size(unique) == n_samples
         assert np.all(unique < n_population)
 
     # test edge case n_population == n_samples == 0
-    assert_equal(np.size(sample_without_replacement(0, 0)), 0)
+    assert np.size(sample_without_replacement(0, 0)) == 0
 
 
 def check_sample_int_distribution(sample_without_replacement):
diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py
index 31118b2a921f3..fd41b07dafafe 100644
--- a/sklearn/utils/tests/test_sparsefuncs.py
+++ b/sklearn/utils/tests/test_sparsefuncs.py
@@ -48,8 +48,8 @@ def test_mean_variance_axis0():
         for X_sparse in (X_csr, X_csc):
             X_sparse = X_sparse.astype(input_dtype)
             X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
-            assert_equal(X_means.dtype, output_dtype)
-            assert_equal(X_vars.dtype, output_dtype)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
             assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
             assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
 
@@ -79,8 +79,8 @@ def test_mean_variance_axis1():
         for X_sparse in (X_csr, X_csc):
             X_sparse = X_sparse.astype(input_dtype)
             X_means, X_vars = mean_variance_axis(X_sparse, axis=0)
-            assert_equal(X_means.dtype, output_dtype)
-            assert_equal(X_vars.dtype, output_dtype)
+            assert X_means.dtype == output_dtype
+            assert X_vars.dtype == output_dtype
             assert_array_almost_equal(X_means, np.mean(X_test, axis=0))
             assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
 
@@ -116,13 +116,14 @@ def test_incr_mean_variance_axis():
             incr_mean_variance_axis(X_csr, axis, last_mean, last_var, last_n)
         assert_array_almost_equal(X_means, X_means_incr)
         assert_array_almost_equal(X_vars, X_vars_incr)
-        assert_equal(X.shape[axis], n_incr)  # X.shape[axis] picks # samples
+        # X.shape[axis] picks # samples
+        assert_array_equal(X.shape[axis], n_incr)
 
         X_csc = sp.csc_matrix(X_lil)
         X_means, X_vars = mean_variance_axis(X_csc, axis)
         assert_array_almost_equal(X_means, X_means_incr)
         assert_array_almost_equal(X_vars, X_vars_incr)
-        assert_equal(X.shape[axis], n_incr)
+        assert_array_equal(X.shape[axis], n_incr)
 
         # Test _incremental_mean_and_var with whole data
         X = np.vstack(data_chunks)
@@ -144,11 +145,11 @@ def test_incr_mean_variance_axis():
                 X_means_incr, X_vars_incr, n_incr = \
                     incr_mean_variance_axis(X_sparse, axis, last_mean,
                                             last_var, last_n)
-                assert_equal(X_means_incr.dtype, output_dtype)
-                assert_equal(X_vars_incr.dtype, output_dtype)
+                assert X_means_incr.dtype == output_dtype
+                assert X_vars_incr.dtype == output_dtype
                 assert_array_almost_equal(X_means, X_means_incr)
                 assert_array_almost_equal(X_vars, X_vars_incr)
-                assert_equal(X.shape[axis], n_incr)
+                assert_array_equal(X.shape[axis], n_incr)
 
 
 @pytest.mark.parametrize("axis", [0, 1])
@@ -522,7 +523,7 @@ def test_inplace_normalize():
                 assert X_csr.indices.dtype == index_dtype
                 assert X_csr.indptr.dtype == index_dtype
                 inplace_csr_row_normalize(X_csr)
-                assert_equal(X_csr.dtype, dtype)
+                assert X_csr.dtype == dtype
                 if inplace_csr_row_normalize is inplace_csr_row_normalize_l2:
                     X_csr.data **= 2
                 assert_array_almost_equal(np.abs(X_csr).sum(axis=1), ones)
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 8bccec90c4856..8d17df6952c06 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -37,24 +37,24 @@
 
 
 def test_assert_less():
-    assert_less(0, 1)
+    assert 0 < 1
     assert_raises(AssertionError, assert_less, 1, 0)
 
 
 def test_assert_greater():
-    assert_greater(1, 0)
+    assert 1 > 0
     assert_raises(AssertionError, assert_greater, 0, 1)
 
 
 def test_assert_less_equal():
-    assert_less_equal(0, 1)
-    assert_less_equal(1, 1)
+    assert 0 <= 1
+    assert 1 <= 1
     assert_raises(AssertionError, assert_less_equal, 1, 0)
 
 
 def test_assert_greater_equal():
-    assert_greater_equal(1, 0)
-    assert_greater_equal(1, 1)
+    assert 1 >= 0
+    assert 1 >= 1
     assert_raises(AssertionError, assert_greater_equal, 0, 1)
 
 
@@ -64,7 +64,7 @@ def test_set_random_state():
     # Linear Discriminant Analysis doesn't have random state: smoke test
     set_random_state(lda, 3)
     set_random_state(tree, 3)
-    assert_equal(tree.random_state, 3)
+    assert tree.random_state == 3
 
 
 def test_assert_allclose_dense_sparse():
@@ -236,13 +236,13 @@ def f():
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", UserWarning)
             filters_orig = warnings.filters[:]
-            assert_equal(assert_warns(UserWarning, f), 3)
+            assert assert_warns(UserWarning, f) == 3
             # test that assert_warns doesn't have side effects on warnings
             # filters
-            assert_equal(warnings.filters, filters_orig)
+            assert warnings.filters == filters_orig
 
         assert_raises(AssertionError, assert_no_warnings, f)
-        assert_equal(assert_no_warnings(lambda x: x, 1), 1)
+        assert assert_no_warnings(lambda x: x, 1) == 1
 
     def test_warn_wrong_warning(self):
         def f():
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index f81a4830d7420..3b831408a821a 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -57,9 +57,9 @@ def ham():
 
         spam = ham()
 
-        assert_equal(spam, "spam")     # function must remain usable
+        assert spam == "spam"     # function must remain usable
 
-        assert_equal(len(w), 1)
+        assert len(w) == 1
         assert issubclass(w[0].category, DeprecationWarning)
         assert "deprecated" in str(w[0].message).lower()
 
@@ -75,7 +75,7 @@ class Ham:
 
         assert hasattr(ham, "SPAM")
 
-        assert_equal(len(w), 1)
+        assert len(w) == 1
         assert issubclass(w[0].category, DeprecationWarning)
         assert "deprecated" in str(w[0].message).lower()
 
@@ -90,7 +90,7 @@ def test_resample():
                   replace=False, n_samples=3)
     assert_raises(ValueError, resample, [0, 1], [0, 1], meaning_of_life=42)
     # Issue:6581, n_samples can be more when replace is True (default).
-    assert_equal(len(resample([1, 2], n_samples=5)), 5)
+    assert len(resample([1, 2], n_samples=5)) == 5
 
 
 def test_resample_stratified():
@@ -161,10 +161,10 @@ def test_safe_mask():
     mask = [False, False, True, True, True]
 
     mask = safe_mask(X, mask)
-    assert_equal(X[mask].shape[0], 3)
+    assert X[mask].shape[0] == 3
 
     mask = safe_mask(X_csr, mask)
-    assert_equal(X_csr[mask].shape[0], 3)
+    assert X_csr[mask].shape[0] == 3
 
 
 def test_column_or_1d():
@@ -241,7 +241,7 @@ def to_tuple(A):    # to make the inner arrays hashable
     A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # A.shape = (2,2,2)
     S = set(to_tuple(A))
     shuffle(A)  # shouldn't raise a ValueError for dim = 3
-    assert_equal(set(to_tuple(A)), S)
+    assert set(to_tuple(A)) == S
 
 
 def test_shuffle_dont_convert_to_array():
@@ -257,20 +257,20 @@ def test_shuffle_dont_convert_to_array():
     e = sp.csc_matrix(np.arange(6).reshape(3, 2))
     a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
 
-    assert_equal(a_s, ['c', 'b', 'a'])
-    assert_equal(type(a_s), list)
+    assert a_s == ['c', 'b', 'a']
+    assert type(a_s) == list
 
     assert_array_equal(b_s, ['c', 'b', 'a'])
-    assert_equal(b_s.dtype, object)
+    assert b_s.dtype == object
 
-    assert_equal(c_s, [3, 2, 1])
-    assert_equal(type(c_s), list)
+    assert c_s == [3, 2, 1]
+    assert type(c_s) == list
 
     assert_array_equal(d_s, np.array([['c', 2],
                                       ['b', 1],
                                       ['a', 0]],
                                      dtype=object))
-    assert_equal(type(d_s), MockDataFrame)
+    assert type(d_s) == MockDataFrame
 
     assert_array_equal(e_s.toarray(), np.array([[4, 5],
                                                 [2, 3],
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 7484eb16882d6..6bdd8d9047376 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -55,13 +55,13 @@ def test_as_float_array():
     X = np.ones((3, 10), dtype=np.int32)
     X = X + np.arange(10, dtype=np.int32)
     X2 = as_float_array(X, copy=False)
-    assert_equal(X2.dtype, np.float32)
+    assert X2.dtype == np.float32
     # Another test
     X = X.astype(np.int64)
     X2 = as_float_array(X, copy=True)
     # Checking that the array wasn't overwritten
     assert as_float_array(X, False) is not X
-    assert_equal(X2.dtype, np.float64)
+    assert X2.dtype == np.float64
     # Test int dtypes <= 32bit
     tested_dtypes = [np.bool,
                      np.int8, np.int16, np.int32,
@@ -69,12 +69,12 @@ def test_as_float_array():
     for dtype in tested_dtypes:
         X = X.astype(dtype)
         X2 = as_float_array(X)
-        assert_equal(X2.dtype, np.float32)
+        assert X2.dtype == np.float32
 
     # Test object dtype
     X = X.astype(object)
     X2 = as_float_array(X, copy=True)
-    assert_equal(X2.dtype, np.float64)
+    assert X2.dtype == np.float64
 
     # Here, X is of the right type, it shouldn't be modified
     X = np.ones((3, 2), dtype=np.float32)
@@ -209,7 +209,7 @@ def test_check_array():
     assert_raises(TypeError, check_array, X_csr)
     # ensure_2d=False
     X_array = check_array([0, 1, 2], ensure_2d=False)
-    assert_equal(X_array.ndim, 1)
+    assert X_array.ndim == 1
     # ensure_2d=True with 1d array
     assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead',
                          check_array, [0, 1, 2], ensure_2d=True)
@@ -235,9 +235,9 @@ def test_check_array():
     for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
         X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
         if dtype is not None:
-            assert_equal(X_checked.dtype, dtype)
+            assert X_checked.dtype == dtype
         else:
-            assert_equal(X_checked.dtype, X.dtype)
+            assert X_checked.dtype == X.dtype
         if order == 'C':
             assert X_checked.flags['C_CONTIGUOUS']
             assert not X_checked.flags['F_CONTIGUOUS']
@@ -273,17 +273,17 @@ def test_check_array():
                         "Can't check dok sparse matrix for nan or inf."]
             assert message in messages
         else:
-            assert_equal(len(w), 0)
+            assert len(w) == 0
         if dtype is not None:
-            assert_equal(X_checked.dtype, dtype)
+            assert X_checked.dtype == dtype
         else:
-            assert_equal(X_checked.dtype, X.dtype)
+            assert X_checked.dtype == X.dtype
         if X.format in accept_sparse:
             # no change if allowed
-            assert_equal(X.format, X_checked.format)
+            assert X.format == X_checked.format
         else:
             # got converted
-            assert_equal(X_checked.format, accept_sparse[0])
+            assert X_checked.format == accept_sparse[0]
         if copy:
             assert X is not X_checked
         else:
@@ -322,28 +322,28 @@ def test_check_array_pandas_dtype_object_conversion():
     # get converted
     X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.object)
     X_df = MockDataFrame(X)
-    assert_equal(check_array(X_df).dtype.kind, "f")
-    assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f")
+    assert check_array(X_df).dtype.kind == "f"
+    assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
     # smoke-test against dataframes with column named "dtype"
     X_df.dtype = "Hans"
-    assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f")
+    assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
 
 
 def test_check_array_on_mock_dataframe():
     arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]])
     mock_df = MockDataFrame(arr)
     checked_arr = check_array(mock_df)
-    assert_equal(checked_arr.dtype,
+    assert (checked_arr.dtype ==
                  arr.dtype)
     checked_arr = check_array(mock_df, dtype=np.float32)
-    assert_equal(checked_arr.dtype, np.dtype(np.float32))
+    assert checked_arr.dtype == np.dtype(np.float32)
 
 
 def test_check_array_dtype_stability():
     # test that lists with ints don't get converted to floats
     X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
-    assert_equal(check_array(X).dtype.kind, "i")
-    assert_equal(check_array(X, ensure_2d=False).dtype.kind, "i")
+    assert check_array(X).dtype.kind == "i"
+    assert check_array(X, ensure_2d=False).dtype.kind == "i"
 
 
 def test_check_array_dtype_warning():
@@ -362,12 +362,12 @@ def test_check_array_dtype_warning():
     for X in integer_data:
         X_checked = assert_no_warnings(check_array, X, dtype=np.float64,
                                        accept_sparse=True)
-        assert_equal(X_checked.dtype, np.float64)
+        assert X_checked.dtype == np.float64
 
         X_checked = assert_warns(DataConversionWarning, check_array, X,
                                  dtype=np.float64,
                                  accept_sparse=True, warn_on_dtype=True)
-        assert_equal(X_checked.dtype, np.float64)
+        assert X_checked.dtype == np.float64
 
         # Check that the warning message includes the name of the Estimator
         X_checked = assert_warns_message(DataConversionWarning,
@@ -377,47 +377,47 @@ def test_check_array_dtype_warning():
                                          accept_sparse=True,
                                          warn_on_dtype=True,
                                          estimator='SomeEstimator')
-        assert_equal(X_checked.dtype, np.float64)
+        assert X_checked.dtype == np.float64
 
         X_checked, y_checked = assert_warns_message(
             DataConversionWarning, 'KNeighborsClassifier',
             check_X_y, X, y, dtype=np.float64, accept_sparse=True,
             warn_on_dtype=True, estimator=KNeighborsClassifier())
 
-        assert_equal(X_checked.dtype, np.float64)
+        assert X_checked.dtype == np.float64
 
     for X in float64_data:
         with pytest.warns(None) as record:
             warnings.simplefilter("ignore", DeprecationWarning)  # 0.23
             X_checked = check_array(X, dtype=np.float64,
                                     accept_sparse=True, warn_on_dtype=True)
-            assert_equal(X_checked.dtype, np.float64)
+            assert X_checked.dtype == np.float64
             X_checked = check_array(X, dtype=np.float64,
                                     accept_sparse=True, warn_on_dtype=False)
-            assert_equal(X_checked.dtype, np.float64)
+            assert X_checked.dtype == np.float64
         assert len(record) == 0
 
     for X in float32_data:
         X_checked = assert_no_warnings(check_array, X,
                                        dtype=[np.float64, np.float32],
                                        accept_sparse=True)
-        assert_equal(X_checked.dtype, np.float32)
+        assert X_checked.dtype == np.float32
         assert X_checked is X
 
         X_checked = assert_no_warnings(check_array, X,
                                        dtype=[np.float64, np.float32],
                                        accept_sparse=['csr', 'dok'],
                                        copy=True)
-        assert_equal(X_checked.dtype, np.float32)
+        assert X_checked.dtype == np.float32
         assert X_checked is not X
 
     X_checked = assert_no_warnings(check_array, X_csc_float32,
                                    dtype=[np.float64, np.float32],
                                    accept_sparse=['csr', 'dok'],
                                    copy=False)
-    assert_equal(X_checked.dtype, np.float32)
+    assert X_checked.dtype == np.float32
     assert X_checked is not X_csc_float32
-    assert_equal(X_checked.format, 'csr')
+    assert X_checked.format == 'csr'
 
 
 def test_check_array_warn_on_dtype_deprecation():
@@ -624,7 +624,7 @@ def test_check_symmetric():
 
         output = check_symmetric(arr, raise_warning=False)
         if sp.issparse(output):
-            assert_equal(output.format, arr_format)
+            assert output.format == arr_format
             assert_array_equal(output.toarray(), arr_sym)
         else:
             assert_array_equal(output, arr_sym)
@@ -648,18 +648,18 @@ def test_check_is_fitted():
     try:
         check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s")
     except ValueError as e:
-        assert_equal(str(e), "Random message ARDRegression, ARDRegression")
+        assert str(e) == "Random message ARDRegression, ARDRegression"
 
     try:
         check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s")
     except AttributeError as e:
-        assert_equal(str(e), "Another message SVR, SVR")
+        assert str(e) == "Another message SVR, SVR"
 
     ard.fit(*make_blobs())
     svr.fit(*make_blobs())
 
-    assert_equal(None, check_is_fitted(ard, "coef_"))
-    assert_equal(None, check_is_fitted(svr, "support_"))
+    assert None == check_is_fitted(ard, "coef_")
+    assert None == check_is_fitted(svr, "support_")
 
 
 def test_check_consistent_length():
@@ -763,9 +763,9 @@ class WrongDummyMemory:
 @pytest.mark.filterwarnings("ignore:The 'cachedir' attribute")
 def test_check_memory():
     memory = check_memory("cache_directory")
-    assert_equal(memory.cachedir, os.path.join('cache_directory', 'joblib'))
+    assert memory.cachedir == os.path.join('cache_directory', 'joblib')
     memory = check_memory(None)
-    assert_equal(memory.cachedir, None)
+    assert memory.cachedir == None
     dummy = DummyMemory()
     memory = check_memory(dummy)
     assert memory is dummy

From 0e5d4e787a796fcf89e3112511fb95d45bb7d6bf Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 16:53:52 +0200
Subject: [PATCH 20/22] pep8

---
 sklearn/cluster/tests/test_bicluster.py       | 10 +++----
 sklearn/cluster/tests/test_k_means.py         |  4 +--
 sklearn/datasets/tests/test_base.py           |  8 +++---
 .../datasets/tests/test_samples_generator.py  | 13 ++++++----
 sklearn/ensemble/tests/test_base.py           |  4 +--
 sklearn/ensemble/tests/test_forest.py         |  9 ++++---
 .../ensemble/tests/test_weight_boosting.py    |  3 +--
 sklearn/linear_model/tests/test_omp.py        |  4 +--
 sklearn/linear_model/tests/test_ridge.py      |  2 +-
 sklearn/manifold/tests/test_locally_linear.py |  4 +--
 .../manifold/tests/test_spectral_embedding.py | 12 ++++-----
 sklearn/manifold/tests/test_t_sne.py          |  2 +-
 .../cluster/tests/test_unsupervised.py        |  4 +--
 sklearn/metrics/tests/test_classification.py  | 26 +++++++++----------
 sklearn/metrics/tests/test_regression.py      |  4 +--
 sklearn/model_selection/tests/test_split.py   |  2 +-
 .../model_selection/tests/test_validation.py  |  4 +--
 sklearn/preprocessing/tests/test_data.py      |  2 +-
 sklearn/tests/test_base.py                    |  6 ++---
 sklearn/tests/test_calibration.py             |  2 +-
 sklearn/tests/test_discriminant_analysis.py   |  9 ++++---
 sklearn/tests/test_init.py                    |  2 +-
 sklearn/tests/test_kernel_approximation.py    |  4 +--
 sklearn/tests/test_pipeline.py                |  2 +-
 sklearn/tree/tests/test_tree.py               |  4 +--
 sklearn/utils/estimator_checks.py             |  5 ++--
 sklearn/utils/tests/test_multiclass.py        |  5 ++--
 sklearn/utils/tests/test_validation.py        |  6 ++---
 28 files changed, 86 insertions(+), 76 deletions(-)

diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index a5b486246c821..7c9179938305b 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -86,7 +86,7 @@ def test_spectral_coclustering():
             assert_array_equal(model.rows_.sum(axis=0), np.ones(30))
             assert_array_equal(model.columns_.sum(axis=0), np.ones(30))
             assert consensus_score(model.biclusters_,
-                                         (rows, cols)) == 1
+                                   (rows, cols)) == 1
 
             _test_shape_indices(model)
 
@@ -127,7 +127,7 @@ def test_spectral_biclustering():
                 assert_array_equal(model.columns_.sum(axis=0),
                                    np.repeat(3, 30))
                 assert consensus_score(model.biclusters_,
-                                             (rows, cols)) == 1
+                                       (rows, cols)) == 1
 
                 _test_shape_indices(model)
 
@@ -217,19 +217,19 @@ def test_perfect_checkerboard():
                                       random_state=0)
     model.fit(S)
     assert consensus_score(model.biclusters_,
-                                 (rows, cols)) == 1
+                           (rows, cols)) == 1
 
     S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                       random_state=0)
     model.fit(S)
     assert consensus_score(model.biclusters_,
-                                 (rows, cols)) == 1
+                           (rows, cols)) == 1
 
     S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                       random_state=0)
     model.fit(S)
     assert consensus_score(model.biclusters_,
-                                 (rows, cols)) == 1
+                           (rows, cols)) == 1
 
 
 def test_errors():
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 212c2311a84fd..03e44db0390b7 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -801,8 +801,8 @@ def test_k_means_init_centers():
         assert_array_equal(init_centers, init_centers_test)
         km = KMeans(init=init_centers_test, n_clusters=3, n_init=1)
         km.fit(X_test)
-        assert False == np.may_share_memory(km.cluster_centers_,
-                                                init_centers)
+        assert np.may_share_memory(km.cluster_centers_,
+                                   init_centers) is False
 
 
 @pytest.mark.parametrize("data", [X, X_csr], ids=["dense", "sparse"])
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index ef802d0c588a6..34dfce66377e1 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -88,7 +88,7 @@ def test_default_empty_load_files(load_files_root):
     res = load_files(load_files_root)
     assert len(res.filenames) == 0
     assert len(res.target_names) == 0
-    assert res.DESCR == None
+    assert res.DESCR is None
 
 
 def test_default_load_files(test_category_dir_1, test_category_dir_2,
@@ -98,7 +98,7 @@ def test_default_load_files(test_category_dir_1, test_category_dir_2,
     res = load_files(load_files_root)
     assert len(res.filenames) == 1
     assert len(res.target_names) == 2
-    assert res.DESCR == None
+    assert res.DESCR is None
     assert res.data == [b"Hello World!\n"]
 
 
@@ -120,8 +120,8 @@ def test_load_files_wo_load_content(
     res = load_files(load_files_root, load_content=False)
     assert len(res.filenames) == 1
     assert len(res.target_names) == 2
-    assert res.DESCR == None
-    assert res.get('data') == None
+    assert res.DESCR is None
+    assert res.get('data') is None
 
 
 def test_load_sample_images():
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index 092fcc1290dea..6cf35d91afc45 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -60,7 +60,8 @@ def test_make_classification():
     assert X.shape == (2000, 31), "X shape mismatch"
     assert y.shape == (2000,), "y shape mismatch"
     assert (np.unique(X.view([('', X.dtype)]*X.shape[1])).view(X.dtype)
-                 .reshape(-1, X.shape[1]).shape[0] == 2000), "Unexpected number of unique rows"
+            .reshape(-1, X.shape[1]).shape[0] == 2000), (
+                "Unexpected number of unique rows")
 
 
 def test_make_classification_informative_features():
@@ -112,8 +113,8 @@ def test_make_classification_informative_features():
             for clusters in clusters_by_class.values():
                 assert len(clusters) == n_clusters_per_class, (
                     "Wrong number of clusters per class")
-            assert (len(clusters_by_class) 
-                    == n_classes), "Wrong number of classes"
+            assert (len(clusters_by_class) == n_classes), (
+                "Wrong number of classes")
 
             assert_array_almost_equal(np.bincount(y) / len(y) // weights,
                                       [1] * n_classes,
@@ -462,8 +463,10 @@ def test_make_circles():
             assert_almost_equal(dist_sqr, dist_exp,
                                 err_msg="Point is not on expected circle")
 
-        assert X[y == 0].shape == (n_outer, 2), "Samples not correctly distributed across circles."
-        assert X[y == 1].shape == (n_inner, 2), "Samples not correctly distributed across circles."
+        assert X[y == 0].shape == (n_outer, 2), (
+            "Samples not correctly distributed across circles.")
+        assert X[y == 1].shape == (n_inner, 2), (
+            "Samples not correctly distributed across circles.")
 
     assert_raises(ValueError, make_circles, factor=-0.01)
     assert_raises(ValueError, make_circles, factor=1.)
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index 7cd2124359e72..16b4df44a5ea8 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -40,7 +40,7 @@ def test_base():
     assert 3 == len(ensemble.estimators_)
 
     assert isinstance(ensemble[0], Perceptron)
-    assert ensemble[0].random_state == None
+    assert ensemble[0].random_state is None
     assert isinstance(ensemble[1].random_state, int)
     assert isinstance(ensemble[2].random_state, int)
     assert ensemble[1].random_state != ensemble[2].random_state
@@ -82,7 +82,7 @@ def test_set_random_states():
     _set_random_states(LinearDiscriminantAnalysis(), random_state=17)
 
     clf1 = Perceptron(tol=1e-3, random_state=None)
-    assert clf1.random_state == None
+    assert clf1.random_state is None
     # check random_state is None still sets
     _set_random_states(clf1, None)
     assert isinstance(clf1.random_state, int)
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 3ce35a4a2ccea..228ebdb830e44 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -764,7 +764,8 @@ def check_min_samples_split(name):
     node_idx = est.estimators_[0].tree_.children_left != -1
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
-    assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name)
+    assert np.min(node_samples) > len(X) * 0.5 - 1, (
+        "Failed with {0}".format(name))
 
     est = ForestEstimator(min_samples_split=0.5, n_estimators=1,
                           random_state=0)
@@ -772,7 +773,8 @@ def check_min_samples_split(name):
     node_idx = est.estimators_[0].tree_.children_left != -1
     node_samples = est.estimators_[0].tree_.n_node_samples[node_idx]
 
-    assert np.min(node_samples) > len(X) * 0.5 - 1, "Failed with {0}".format(name)
+    assert np.min(node_samples) > len(X) * 0.5 - 1, (
+        "Failed with {0}".format(name))
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
@@ -807,7 +809,8 @@ def check_min_samples_leaf(name):
     node_counts = np.bincount(out)
     # drop inner nodes
     leaf_count = node_counts[node_counts != 0]
-    assert np.min(leaf_count) > len(X) * 0.25 - 1, "Failed with {0}".format(name)
+    assert np.min(leaf_count) > len(X) * 0.25 - 1, (
+        "Failed with {0}".format(name))
 
 
 @pytest.mark.parametrize('name', FOREST_ESTIMATORS)
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index 6f03754345519..fb3cce1cef0ae 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -259,8 +259,7 @@ def test_importances():
         importances = clf.feature_importances_
 
         assert importances.shape[0] == 10
-        assert ((importances[:3, np.newaxis] >= importances[3:]).all() ==
-                     True)
+        assert (importances[:3, np.newaxis] >= importances[3:]).all()
 
 
 def test_error():
diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py
index 074af8045a59d..c2ba9d02c296a 100644
--- a/sklearn/linear_model/tests/test_omp.py
+++ b/sklearn/linear_model/tests/test_omp.py
@@ -175,8 +175,8 @@ def test_no_atoms():
     Xy_empty = np.dot(X.T, y_empty)
     gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, 1)
     gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, 1)
-    assert np.all(gamma_empty == 0) == True
-    assert np.all(gamma_empty_gram == 0) == True
+    assert np.all(gamma_empty == 0)
+    assert np.all(gamma_empty_gram == 0)
 
 
 def test_omp_path():
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index de517b20e859f..469d0e03d9173 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -1007,7 +1007,7 @@ def test_n_iter():
     for solver in ('sparse_cg', 'svd', 'cholesky'):
         reg = Ridge(solver=solver, max_iter=1, tol=1e-1)
         reg.fit(X, y_n)
-        assert reg.n_iter_ == None
+        assert reg.n_iter_ is None
 
 
 def test_ridge_fit_intercept_sparse():
diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py
index 2e05710b79b9a..adaddb03d2af3 100644
--- a/sklearn/manifold/tests/test_locally_linear.py
+++ b/sklearn/manifold/tests/test_locally_linear.py
@@ -97,8 +97,8 @@ def test_lle_manifold():
             details = ("solver: %s, method: %s" % (solver, method))
             assert reconstruction_error < tol, details
             assert (np.abs(clf.reconstruction_error_ -
-                               reconstruction_error) <
-                        tol * reconstruction_error), details
+                           reconstruction_error) <
+                    tol * reconstruction_error), details
 
 
 # Test the error raised when parameter passed to lle is invalid
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index b6e3879b8beab..a14b58eb243fa 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -220,17 +220,17 @@ def test_connectivity(seed=36):
                       [0, 1, 1, 1, 0],
                       [0, 0, 1, 1, 1],
                       [0, 0, 0, 1, 1]])
-    assert _graph_is_connected(graph) == False
-    assert _graph_is_connected(sparse.csr_matrix(graph)) == False
-    assert _graph_is_connected(sparse.csc_matrix(graph)) == False
+    assert not _graph_is_connected(graph)
+    assert not _graph_is_connected(sparse.csr_matrix(graph))
+    assert not _graph_is_connected(sparse.csc_matrix(graph))
     graph = np.array([[1, 1, 0, 0, 0],
                       [1, 1, 1, 0, 0],
                       [0, 1, 1, 1, 0],
                       [0, 0, 1, 1, 1],
                       [0, 0, 0, 1, 1]])
-    assert _graph_is_connected(graph) == True
-    assert _graph_is_connected(sparse.csr_matrix(graph)) == True
-    assert _graph_is_connected(sparse.csc_matrix(graph)) == True
+    assert _graph_is_connected(graph)
+    assert _graph_is_connected(sparse.csr_matrix(graph))
+    assert _graph_is_connected(sparse.csc_matrix(graph))
 
 
 def test_spectral_embedding_deterministic():
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index ceb569026489d..5f49cc3e3e507 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -672,7 +672,7 @@ def test_n_iter_without_progress():
 
         # The output needs to contain the value of n_iter_without_progress
         assert ("did not make any progress during the "
-                  "last -1 episodes. Finished." in out)
+                "last -1 episodes. Finished." in out)
 
 
 def test_min_grad_norm():
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
index 29dfd930b0702..3d225321d211b 100644
--- a/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -193,11 +193,11 @@ def test_calinski_harabasz_score():
 
     # Assert the value is 1. when all samples are equals
     assert 1. == calinski_harabasz_score(np.ones((10, 2)),
-                                             [0] * 5 + [1] * 5)
+                                         [0] * 5 + [1] * 5)
 
     # Assert the value is 0. when all the mean cluster are equal
     assert 0. == calinski_harabasz_score([[-1, -1], [1, 1]] * 10,
-                                             [0] * 10 + [1] * 10)
+                                         [0] * 10 + [1] * 10)
 
     # General case (with non numpy arrays)
     X = ([[0, 0], [1, 1]] * 5 + [[3, 3], [4, 4]] * 5 +
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index d51631a5ff6f9..c65fb969626df 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1335,7 +1335,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 1.5 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2.5 / 1.5 * 0.25)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="macro"),
                         np.mean(f2))
 
@@ -1345,7 +1345,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 0.5)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1356,7 +1356,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 1.5 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2.5 / 1.5 * 0.25)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1369,7 +1369,7 @@ def test_precision_recall_f1_score_multilabel_1():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 0.5)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2, average="samples"),
                         0.5)
 
@@ -1401,7 +1401,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 0.25)
     assert_almost_equal(r, 0.25)
     assert_almost_equal(f, 2 * 0.25 * 0.25 / 0.5)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1411,7 +1411,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 0.25)
     assert_almost_equal(r, 0.125)
     assert_almost_equal(f, 2 / 12)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="macro"),
                         np.mean(f2))
@@ -1421,7 +1421,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 2 / 4)
     assert_almost_equal(r, 1 / 4)
     assert_almost_equal(f, 2 / 3 * 2 / 4)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1436,7 +1436,7 @@ def test_precision_recall_f1_score_multilabel_2():
     assert_almost_equal(p, 1 / 6)
     assert_almost_equal(r, 1 / 6)
     assert_almost_equal(f, 2 / 4 * 1 / 3)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="samples"),
                         0.1666, 2)
@@ -1466,7 +1466,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 0.5)
     assert_almost_equal(r, 1.5 / 4)
     assert_almost_equal(f, 2.5 / (4 * 1.5))
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="macro"),
                         np.mean(f2))
@@ -1476,7 +1476,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 2 / 3)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, 2 / 3 / (2 / 3 + 0.5))
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="micro"),
                         (1 + 4) * p * r / (4 * p + r))
@@ -1486,7 +1486,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 3 / 4)
     assert_almost_equal(r, 0.5)
     assert_almost_equal(f, (2 / 1.5 + 1) / 4)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="weighted"),
                         np.average(f2, weights=support))
@@ -1499,7 +1499,7 @@ def test_precision_recall_f1_score_with_an_empty_prediction():
     assert_almost_equal(p, 1 / 3)
     assert_almost_equal(r, 1 / 3)
     assert_almost_equal(f, 1 / 3)
-    assert s == None
+    assert s is None
     assert_almost_equal(fbeta_score(y_true, y_pred, beta=2,
                                     average="samples"),
                         0.333, 2)
@@ -1518,7 +1518,7 @@ def test_precision_recall_f1_no_labels(beta, average):
     assert_almost_equal(p, 0)
     assert_almost_equal(r, 0)
     assert_almost_equal(f, 0)
-    assert s == None
+    assert s is None
 
     fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                          y_true, y_pred,
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index a40ec3856f201..7903de36260f5 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -144,7 +144,7 @@ def test_regression_multioutput_array():
     r = r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]], multioutput='raw_values')
     assert_array_almost_equal(r, [0, -3.5], decimal=2)
     assert np.mean(r) == r2_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
-                 multioutput='uniform_average')
+                                  multioutput='uniform_average')
     evs = explained_variance_score([[0, -1], [0, 1]], [[2, 2], [1, 1]],
                                    multioutput='raw_values')
     assert_array_almost_equal(evs, [0, -1.25], decimal=2)
@@ -156,7 +156,7 @@ def test_regression_multioutput_array():
     r2 = r2_score(y_true, y_pred, multioutput='raw_values')
     assert_array_almost_equal(r2, [1., -3.], decimal=2)
     assert np.mean(r2) == r2_score(y_true, y_pred,
-                 multioutput='uniform_average')
+                                   multioutput='uniform_average')
     evs = explained_variance_score(y_true, y_pred, multioutput='raw_values')
     assert_array_almost_equal(evs, [1., -3.], decimal=2)
     assert np.mean(evs) == explained_variance_score(y_true, y_pred)
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 583000e2000bc..f0c0f6f453c5c 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -915,7 +915,7 @@ def test_leave_group_out_changing_groups():
                                                     groups=groups))
     # n_splits = no of unique groups (C(uniq_lbls, 1) = n_unique_groups)
     assert 3 == LeaveOneGroupOut().get_n_splits(X, y=X,
-                                                    groups=groups)
+                                                groups=groups)
 
 
 def test_leave_one_p_group_out_error_on_fewer_number_of_groups():
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 1d0f1cb1be8d0..2c84439a7c29d 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -471,8 +471,8 @@ def check_cross_validate_multi_metric(clf, X, y, scores):
                                             return_train_score=False)
             assert isinstance(cv_results, dict)
             assert (set(cv_results.keys()) ==
-                         (keys_with_train if return_train_score
-                         else keys_sans_train))
+                    (keys_with_train if return_train_score
+                     else keys_sans_train))
             assert_array_almost_equal(cv_results['test_r2'], test_r2_scores)
             assert_array_almost_equal(
                 cv_results['test_neg_mean_squared_error'], test_mse_scores)
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index ef3e4c4768c84..6d21ba340b1ae 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -129,7 +129,7 @@ def test_polynomial_features():
     assert_array_almost_equal(X_poly, P2[:, [0, 1, 2, 4]])
 
     assert interact.powers_.shape == (interact.n_output_features_,
-                 interact.n_input_features_)
+                                      interact.n_input_features_)
 
 
 def test_polynomial_feature_names():
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 190af5a8f6800..257753c23584f 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -424,7 +424,7 @@ def test_pickling_when_getstate_is_overwritten_by_mixin():
     serialized = pickle.dumps(estimator)
     estimator_restored = pickle.loads(serialized)
     assert estimator_restored.attribute_pickled == 5
-    assert estimator_restored._attribute_not_pickled == None
+    assert estimator_restored._attribute_not_pickled is None
     assert estimator_restored._restored
 
 
@@ -438,7 +438,7 @@ def test_pickling_when_getstate_is_overwritten_by_mixin_outside_of_sklearn():
 
         serialized = estimator.__getstate__()
         assert serialized == {'_attribute_not_pickled': None,
-                                       'attribute_pickled': 5}
+                              'attribute_pickled': 5}
 
         serialized['attribute_pickled'] = 4
         estimator.__setstate__(serialized)
@@ -467,7 +467,7 @@ def test_pickling_works_when_getstate_is_overwritten_in_the_child_class():
     serialized = pickle.dumps(estimator)
     estimator_restored = pickle.loads(serialized)
     assert estimator_restored.attribute_pickled == 5
-    assert estimator_restored._attribute_not_pickled == None
+    assert estimator_restored._attribute_not_pickled is None
 
 
 def test_tag_inheritance():
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index db7ed1920c43e..a7ca57c0c18c6 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -318,7 +318,7 @@ def test_calibration_less_classes():
         proba = calibrated_classifier.predict_proba(X)
         assert_array_equal(proba[:, i], np.zeros(len(y)))
         assert np.all(np.hstack([proba[:, :i],
-                                       proba[:, i + 1:]])) == True
+                                 proba[:, i + 1:]]))
 
 
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index c85227b37a7eb..e6527cc4330f7 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -236,12 +236,14 @@ def test_lda_explained_variance_ratio():
     clf_lda_eigen = LinearDiscriminantAnalysis(solver="eigen")
     clf_lda_eigen.fit(X, y)
     assert_almost_equal(clf_lda_eigen.explained_variance_ratio_.sum(), 1.0, 3)
-    assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_"
+    assert clf_lda_eigen.explained_variance_ratio_.shape == (2,), (
+        "Unexpected length for explained_variance_ratio_")
 
     clf_lda_svd = LinearDiscriminantAnalysis(solver="svd")
     clf_lda_svd.fit(X, y)
     assert_almost_equal(clf_lda_svd.explained_variance_ratio_.sum(), 1.0, 3)
-    assert clf_lda_svd.explained_variance_ratio_.shape == (2,), "Unexpected length for explained_variance_ratio_"
+    assert clf_lda_svd.explained_variance_ratio_.shape == (2,), (
+        "Unexpected length for explained_variance_ratio_")
 
     assert_array_almost_equal(clf_lda_svd.explained_variance_ratio_,
                               clf_lda_eigen.explained_variance_ratio_)
@@ -294,7 +296,8 @@ def test_lda_scaling():
     for solver in ('svd', 'lsqr', 'eigen'):
         clf = LinearDiscriminantAnalysis(solver=solver)
         # should be able to separate the data perfectly
-        assert clf.fit(x, y).score(x, y) == 1.0, 'using covariance: %s' % solver
+        assert clf.fit(x, y).score(x, y) == 1.0, (
+            'using covariance: %s' % solver)
 
 
 def test_lda_store_covariance():
diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py
index d936ee4e6d2b7..c2a216dddb937 100644
--- a/sklearn/tests/test_init.py
+++ b/sklearn/tests/test_init.py
@@ -17,4 +17,4 @@ def test_import_skl():
     # Test either above import has failed for some reason
     # "import *" is discouraged outside of the module level, hence we
     # rely on setting up the variable above
-    assert _top_import_error == None
+    assert _top_import_error is None
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index af589010d6769..79848f5561799 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -65,11 +65,11 @@ def test_additive_chi2_sampler():
 
         # test that the sample_interval is initialized correctly
         transform = AdditiveChi2Sampler(sample_steps=sample_steps)
-        assert transform.sample_interval == None
+        assert transform.sample_interval is None
 
         # test that the sample_interval is changed in the fit method
         transform.fit(X)
-        assert transform.sample_interval_ != None
+        assert transform.sample_interval_ is not None
 
     # test that the sample_interval is set correctly
     sample_interval = 0.3
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index e064f0ba39572..0a2e67d599d85 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -177,7 +177,7 @@ def test_pipeline_init():
     # Check that params are set
     pipe.set_params(svc__a=0.1)
     assert clf.a == 0.1
-    assert clf.b == None
+    assert clf.b is None
     # Smoke test the repr:
     repr(pipe)
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index 89a1816fd852e..dbce4a5d0d560 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -827,8 +827,8 @@ def test_min_impurity_split():
                         est.min_impurity_split))
                 assert est.tree_.impurity[node] <= min_impurity_split, (
                     "Failed with {0}, min_impurity_split={1}".format(
-                    est.tree_.impurity[node],
-                    est.min_impurity_split))
+                        est.tree_.impurity[node],
+                        est.min_impurity_split))
 
 
 def test_min_impurity_decrease():
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 42220406b477d..18cba0cbc56f1 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -721,7 +721,8 @@ def check_dict_unchanged(name, estimator_orig):
         if hasattr(estimator, method):
             dict_before = estimator.__dict__.copy()
             getattr(estimator, method)(X)
-            assert estimator.__dict__ == dict_before, 'Estimator changes __dict__ during %s' % method
+            assert estimator.__dict__ == dict_before, (
+                'Estimator changes __dict__ during %s' % method)
 
 
 def is_public_parameter(attr):
@@ -2372,7 +2373,7 @@ def check_set_params(name, estimator_orig):
             else:
                 curr_params = estimator.get_params(deep=False)
                 assert (set(test_params.keys()) ==
-                             set(curr_params.keys())), msg
+                        set(curr_params.keys())), msg
                 for k, v in curr_params.items():
                     assert test_params[k] is v, msg
         test_params[param_name] = default_value
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 962d927f43ba1..4dc44e797e211 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -280,8 +280,9 @@ def test_check_classification_targets():
 def test_type_of_target():
     for group, group_examples in EXAMPLES.items():
         for example in group_examples:
-            assert type_of_target(example) == group, ('type_of_target(%r) should be %r, got %r'
-                              % (example, group, type_of_target(example)))
+            assert type_of_target(example) == group, (
+                'type_of_target(%r) should be %r, got %r'
+                % (example, group, type_of_target(example)))
 
     for example in NON_ARRAY_LIKE_EXAMPLES:
         msg_regex = r'Expected array-like \(array or non-string sequence\).*'
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 6bdd8d9047376..7cd6929892170 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -658,8 +658,8 @@ def test_check_is_fitted():
     ard.fit(*make_blobs())
     svr.fit(*make_blobs())
 
-    assert None == check_is_fitted(ard, "coef_")
-    assert None == check_is_fitted(svr, "support_")
+    assert check_is_fitted(ard, "coef_") is None
+    assert check_is_fitted(svr, "support_") is None
 
 
 def test_check_consistent_length():
@@ -765,7 +765,7 @@ def test_check_memory():
     memory = check_memory("cache_directory")
     assert memory.cachedir == os.path.join('cache_directory', 'joblib')
     memory = check_memory(None)
-    assert memory.cachedir == None
+    assert memory.cachedir is None
     dummy = DummyMemory()
     memory = check_memory(dummy)
     assert memory is dummy

From d439ad72c84a10387748c5ee1f9331a01cf24a92 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 17:22:03 +0200
Subject: [PATCH 21/22] remove some unused imports

---
 sklearn/cluster/tests/test_affinity_propagation.py           | 2 +-
 sklearn/cluster/tests/test_bicluster.py                      | 1 -
 sklearn/cluster/tests/test_birch.py                          | 3 ---
 sklearn/cluster/tests/test_dbscan.py                         | 3 ---
 sklearn/cluster/tests/test_hierarchical.py                   | 1 -
 sklearn/cluster/tests/test_k_means.py                        | 3 ---
 sklearn/cluster/tests/test_optics.py                         | 1 -
 sklearn/cluster/tests/test_spectral.py                       | 1 -
 sklearn/compose/tests/test_column_transformer.py             | 2 --
 sklearn/cross_decomposition/tests/test_pls.py                | 2 +-
 sklearn/datasets/tests/test_20news.py                        | 1 -
 sklearn/datasets/tests/test_base.py                          | 1 -
 sklearn/datasets/tests/test_covtype.py                       | 2 +-
 sklearn/datasets/tests/test_kddcup99.py                      | 2 +-
 sklearn/datasets/tests/test_lfw.py                           | 1 -
 sklearn/datasets/tests/test_samples_generator.py             | 2 --
 sklearn/datasets/tests/test_svmlight_format.py               | 2 --
 sklearn/decomposition/tests/test_dict_learning.py            | 2 --
 sklearn/decomposition/tests/test_factor_analysis.py          | 3 ---
 sklearn/decomposition/tests/test_fastica.py                  | 2 --
 sklearn/decomposition/tests/test_kernel_pca.py               | 5 ++---
 sklearn/decomposition/tests/test_online_lda.py               | 2 --
 sklearn/decomposition/tests/test_sparse_pca.py               | 1 -
 sklearn/ensemble/tests/test_bagging.py                       | 3 ---
 sklearn/ensemble/tests/test_base.py                          | 2 --
 sklearn/ensemble/tests/test_forest.py                        | 3 ---
 sklearn/ensemble/tests/test_gradient_boosting.py             | 3 ---
 .../ensemble/tests/test_gradient_boosting_loss_functions.py  | 1 -
 sklearn/ensemble/tests/test_iforest.py                       | 2 --
 29 files changed, 6 insertions(+), 53 deletions(-)

diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index 57ab89cfd6e54..3b938ecddc0d2 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -9,7 +9,7 @@
 
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.testing import (
-    assert_equal, assert_array_equal, assert_raises,
+    assert_array_equal, assert_raises,
     assert_warns, assert_warns_message, assert_no_warnings)
 
 from sklearn.cluster.affinity_propagation_ import AffinityPropagation
diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index 7c9179938305b..4c230f4c2adc0 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -5,7 +5,6 @@
 
 from sklearn.model_selection import ParameterGrid
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
index 522b0304a0111..e4d87f46cc70e 100644
--- a/sklearn/cluster/tests/test_birch.py
+++ b/sklearn/cluster/tests/test_birch.py
@@ -13,9 +13,6 @@
 from sklearn.linear_model import ElasticNet
 from sklearn.metrics import pairwise_distances_argmin, v_measure_score
 
-from sklearn.utils.testing import assert_greater_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index 02f110e663ec4..919f4bc4aae74 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -11,11 +11,8 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_in
-from sklearn.utils.testing import assert_not_in
 from sklearn.neighbors import NearestNeighbors
 from sklearn.cluster.dbscan_ import DBSCAN
 from sklearn.cluster.dbscan_ import dbscan
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index 7cd78a3e0ac40..c630ea5d2e8c0 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -16,7 +16,6 @@
 
 from sklearn.metrics.cluster.supervised import adjusted_rand_score
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raise_message
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 03e44db0390b7..4fca8f621e141 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -6,15 +6,12 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index b90d8ee7a1e3d..f71be6bc627c1 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -13,7 +13,6 @@
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.cluster.dbscan_ import DBSCAN
 from sklearn.utils import shuffle
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_allclose
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index 58cc9f4f1036f..df47b089c8d7e 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -8,7 +8,6 @@
 import pickle
 
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index bcbbcc1c3902e..f1abbdccbdb42 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -9,8 +9,6 @@
 
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_dict_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_allclose_dense_sparse
 from sklearn.utils.testing import assert_almost_equal
diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
index abb305aefdb37..687f28b6104c5 100644
--- a/sklearn/cross_decomposition/tests/test_pls.py
+++ b/sklearn/cross_decomposition/tests/test_pls.py
@@ -2,7 +2,7 @@
 import numpy as np
 from numpy.testing import assert_approx_equal
 
-from sklearn.utils.testing import (assert_equal, assert_array_almost_equal,
+from sklearn.utils.testing import (assert_array_almost_equal,
                                    assert_array_equal, assert_raise_message,
                                    assert_warns)
 from sklearn.datasets import load_linnerud
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index 5b171999433db..04fc994598fe1 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -2,7 +2,6 @@
 import numpy as np
 import scipy.sparse as sp
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 34dfce66377e1..1b58115d337e7 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -27,7 +27,6 @@
 
 from sklearn.externals._pilutil import pillow_installed
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils import IS_PYPY
 
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 0c30a0c7d5b18..3d349f457761f 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -4,7 +4,7 @@
 """
 
 from sklearn.datasets import fetch_covtype
-from sklearn.utils.testing import assert_equal, SkipTest
+from sklearn.utils.testing import SkipTest
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 6efb23c6dfd26..f7a24e7d26f86 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -7,7 +7,7 @@
 
 from sklearn.datasets import fetch_kddcup99
 from sklearn.datasets.tests.test_common import check_return_X_y
-from sklearn.utils.testing import assert_equal, SkipTest
+from sklearn.utils.testing import SkipTest
 from functools import partial
 
 
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 11211e803f93d..081caed328760 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -19,7 +19,6 @@
 from sklearn.datasets import fetch_lfw_people
 
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import assert_raises
 from sklearn.datasets.tests.test_common import check_return_X_y
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index 6cf35d91afc45..90af621f8bb87 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -6,11 +6,9 @@
 import pytest
 import scipy.sparse as sp
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index bec67a7aa3819..4ff4e8422817c 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -9,12 +9,10 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import fails_if_pypy
 
 import sklearn
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index cbe4c822cb5ab..0e3e2f7e80f1d 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -9,8 +9,6 @@
 
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import TempMemmap
diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py
index 8547a3c0f6bff..43a8f4b78e13d 100644
--- a/sklearn/decomposition/tests/test_factor_analysis.py
+++ b/sklearn/decomposition/tests/test_factor_analysis.py
@@ -5,9 +5,6 @@
 import numpy as np
 
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 04ef5d6f86fba..6e3b830418291 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -10,8 +10,6 @@
 
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index c5ac24b3423f7..a61406007d5d1 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -2,8 +2,7 @@
 import scipy.sparse as sp
 import pytest
 
-from sklearn.utils.testing import (assert_array_almost_equal, assert_less,
-                                   assert_equal, assert_not_equal,
+from sklearn.utils.testing import (assert_array_almost_equal,
                                    assert_raises, assert_allclose)
 
 from sklearn.decomposition import PCA, KernelPCA
@@ -108,7 +107,7 @@ def test_kernel_pca_sparse():
 
             # inverse transform
             # X_pred2 = kpca.inverse_transform(X_pred_transformed)
-            # assert_equal(X_pred2.shape, X_pred.shape)
+            # assert X_pred2.shape == X_pred.shape)
 
 
 def test_kernel_pca_linear_kernel():
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index 1c13c890c2ea6..dc050221e5661 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -12,10 +12,8 @@
                                                _dirichlet_expectation_2d)
 
 from sklearn.utils.testing import assert_allclose
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
 
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index 8440dd17717bc..5d3f265cb9418 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -7,7 +7,6 @@
 import numpy as np
 
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import if_safe_multiprocessing_with_blas
 
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index f4bda051816ee..345ee90f1fe49 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -12,10 +12,7 @@
 
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_raise_message
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index 16b4df44a5ea8..73b7c1e5fba42 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -6,10 +6,8 @@
 # License: BSD 3 clause
 
 import numpy as np
-from numpy.testing import assert_equal
 
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_not_equal
 
 from sklearn.datasets import load_iris
 from sklearn.ensemble import BaggingClassifier
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 228ebdb830e44..01102c9679053 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -28,9 +28,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_less, assert_greater
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index aa041073157a4..17e09f7f07156 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -27,9 +27,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
index d7fbc4a986469..6b24f90d0239d 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
@@ -5,7 +5,6 @@
 import numpy as np
 from numpy.testing import assert_almost_equal
 from numpy.testing import assert_allclose
-from numpy.testing import assert_equal
 import pytest
 
 from sklearn.utils import check_random_state
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
index 298e0e422cce5..e3ce3c2100793 100644
--- a/sklearn/ensemble/tests/test_iforest.py
+++ b/sklearn/ensemble/tests/test_iforest.py
@@ -14,8 +14,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_allclose
 

From dc5017f445ec89ddbdbca9544074eeb6e533fe92 Mon Sep 17 00:00:00 2001
From: adrinjalali <adrin.jalali@gmail.com>
Date: Sun, 30 Jun 2019 18:21:45 +0200
Subject: [PATCH 22/22] automatic removal of unused imports

---
 sklearn/covariance/tests/test_covariance.py                 | 1 -
 sklearn/datasets/tests/test_rcv1.py                         | 1 -
 sklearn/decomposition/tests/test_nmf.py                     | 2 --
 sklearn/ensemble/tests/test_voting.py                       | 1 -
 sklearn/ensemble/tests/test_weight_boosting.py              | 1 -
 sklearn/feature_selection/tests/test_base.py                | 2 +-
 sklearn/feature_selection/tests/test_feature_select.py      | 3 ---
 sklearn/feature_selection/tests/test_from_model.py          | 3 ---
 sklearn/feature_selection/tests/test_rfe.py                 | 1 -
 sklearn/linear_model/tests/test_base.py                     | 2 --
 sklearn/linear_model/tests/test_bayes.py                    | 1 -
 sklearn/linear_model/tests/test_coordinate_descent.py       | 2 --
 sklearn/linear_model/tests/test_huber.py                    | 1 -
 sklearn/linear_model/tests/test_least_angle.py              | 3 ---
 sklearn/linear_model/tests/test_logistic.py                 | 2 --
 sklearn/linear_model/tests/test_omp.py                      | 1 -
 sklearn/linear_model/tests/test_passive_aggressive.py       | 2 --
 sklearn/linear_model/tests/test_perceptron.py               | 1 -
 sklearn/linear_model/tests/test_ransac.py                   | 1 -
 sklearn/linear_model/tests/test_ridge.py                    | 2 --
 sklearn/linear_model/tests/test_sag.py                      | 1 -
 sklearn/linear_model/tests/test_sgd.py                      | 3 ---
 .../linear_model/tests/test_sparse_coordinate_descent.py    | 3 ---
 sklearn/manifold/tests/test_isomap.py                       | 1 -
 sklearn/manifold/tests/test_locally_linear.py               | 1 -
 sklearn/manifold/tests/test_spectral_embedding.py           | 2 +-
 sklearn/manifold/tests/test_t_sne.py                        | 5 -----
 sklearn/metrics/cluster/tests/test_bicluster.py             | 2 +-
 sklearn/metrics/cluster/tests/test_unsupervised.py          | 2 --
 sklearn/metrics/tests/test_classification.py                | 2 --
 sklearn/metrics/tests/test_common.py                        | 1 -
 sklearn/metrics/tests/test_pairwise.py                      | 2 --
 sklearn/metrics/tests/test_ranking.py                       | 3 +--
 sklearn/metrics/tests/test_regression.py                    | 1 -
 sklearn/metrics/tests/test_score_objects.py                 | 2 --
 sklearn/mixture/tests/test_bayesian_mixture.py              | 2 +-
 sklearn/mixture/tests/test_gaussian_mixture.py              | 3 ---
 sklearn/model_selection/tests/test_search.py                | 3 ---
 sklearn/model_selection/tests/test_split.py                 | 4 ----
 sklearn/model_selection/tests/test_validation.py            | 3 ---
 sklearn/neighbors/tests/test_lof.py                         | 2 --
 sklearn/neighbors/tests/test_neighbors.py                   | 3 ---
 sklearn/preprocessing/tests/test_data.py                    | 3 ---
 sklearn/preprocessing/tests/test_function_transformer.py    | 1 -
 sklearn/preprocessing/tests/test_label.py                   | 1 -
 sklearn/semi_supervised/tests/test_label_propagation.py     | 1 -
 sklearn/svm/tests/test_svm.py                               | 2 --
 sklearn/tests/test_base.py                                  | 3 ---
 sklearn/tests/test_common.py                                | 2 --
 sklearn/tests/test_discriminant_analysis.py                 | 2 --
 sklearn/tests/test_docstring_parameters.py                  | 2 +-
 sklearn/tests/test_dummy.py                                 | 1 -
 sklearn/tests/test_init.py                                  | 1 -
 sklearn/tests/test_kernel_approximation.py                  | 4 +---
 sklearn/tests/test_multiclass.py                            | 2 --
 sklearn/tests/test_multioutput.py                           | 3 ---
 sklearn/tests/test_naive_bayes.py                           | 2 --
 sklearn/tests/test_pipeline.py                              | 2 --
 sklearn/tests/test_random_projection.py                     | 3 ---
 sklearn/tree/tests/test_tree.py                             | 6 ------
 sklearn/utils/tests/test_class_weight.py                    | 1 -
 sklearn/utils/tests/test_extmath.py                         | 2 --
 sklearn/utils/tests/test_fast_dict.py                       | 1 -
 sklearn/utils/tests/test_multiclass.py                      | 1 -
 sklearn/utils/tests/test_murmurhash.py                      | 1 -
 sklearn/utils/tests/test_validation.py                      | 1 -
 66 files changed, 7 insertions(+), 125 deletions(-)

diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index d7e6428ee27fb..a98e05c7ceaba 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -11,7 +11,6 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_greater
 
 from sklearn import datasets
 from sklearn.covariance import empirical_covariance, EmpiricalCovariance, \
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index aa747bd5d74fe..2e9f42fa3634b 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -11,7 +11,6 @@
 from sklearn.datasets.tests.test_common import check_return_X_y
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import SkipTest
 
 
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index b6703f0c24c0c..35681d7e65736 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -14,8 +14,6 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.extmath import squared_norm
 from sklearn.base import clone
diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
index be29d1fbcff64..767755c23f460 100644
--- a/sklearn/ensemble/tests/test_voting.py
+++ b/sklearn/ensemble/tests/test_voting.py
@@ -5,7 +5,6 @@
 
 from sklearn.utils.testing import assert_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raise_message
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LinearRegression
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index fb3cce1cef0ae..1cb1e9d1431cf 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -4,7 +4,6 @@
 
 from sklearn.utils.testing import assert_array_equal, assert_array_less
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal, assert_greater
 from sklearn.utils.testing import assert_raises, assert_raises_regexp
 
 from sklearn.base import BaseEstimator
diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
index f2e3b36d456b5..f75f1789243fc 100644
--- a/sklearn/feature_selection/tests/test_base.py
+++ b/sklearn/feature_selection/tests/test_base.py
@@ -6,7 +6,7 @@
 from sklearn.base import BaseEstimator
 from sklearn.feature_selection.base import SelectorMixin
 from sklearn.utils import check_array
-from sklearn.utils.testing import assert_raises, assert_equal
+from sklearn.utils.testing import assert_raises
 
 
 class StepSelector(SelectorMixin, BaseEstimator):
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index ac1822e1a6063..0283c3f6aba89 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -8,13 +8,10 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_not_in
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index a5f61bfa0f061..3c281c552c7d5 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -1,9 +1,6 @@
 import pytest
 import numpy as np
 
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_allclose
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index 1ad5375edb6f3..0ef1cb12efdba 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -16,7 +16,6 @@
 
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import assert_greater, assert_equal
 
 from sklearn.metrics import make_scorer
 from sklearn.metrics import get_scorer
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 84676151d2a74..c9c240125997c 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -12,7 +12,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_allclose
 
 from sklearn.linear_model.base import LinearRegression
@@ -20,7 +19,6 @@
 from sklearn.linear_model.base import _rescale_data
 from sklearn.linear_model.base import make_dataset
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_greater
 from sklearn.datasets.samples_generator import make_sparse_uncorrelated
 from sklearn.datasets.samples_generator import make_regression
 from sklearn.datasets import load_iris
diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py
index 355cd042347af..f04d7d9569c49 100644
--- a/sklearn/linear_model/tests/test_bayes.py
+++ b/sklearn/linear_model/tests/test_bayes.py
@@ -11,7 +11,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils import check_random_state
 from sklearn.linear_model.bayes import BayesianRidge, ARDRegression
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index aa6773fce415b..005c0bff343b0 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -11,8 +11,6 @@
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_raise_message
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 4145888c3d996..be2b0106a8ef3 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -8,7 +8,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_greater
 
 from sklearn.datasets import make_regression
 from sklearn.linear_model import (
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index 9633989693a83..dbef55b973c7b 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -9,9 +9,6 @@
 from sklearn.model_selection import train_test_split
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index cdbe2f9b3ba37..6fe862db591b4 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -19,8 +19,6 @@
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py
index c2ba9d02c296a..a2f89b30935bd 100644
--- a/sklearn/linear_model/tests/test_omp.py
+++ b/sklearn/linear_model/tests/test_omp.py
@@ -4,7 +4,6 @@
 import numpy as np
 
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_warns
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index d0253bbce64f6..8e8bfdc8b9800 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -3,8 +3,6 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index 75b91e7b50ba9..bce518b5f2e37 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -3,7 +3,6 @@
 import pytest
 
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 
 from sklearn.utils import check_random_state
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index af8153590d2ff..5020c2ceb4d32 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -7,7 +7,6 @@
 from numpy.testing import assert_array_equal
 
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises_regexp
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 469d0e03d9173..2743414b7e60c 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -8,9 +8,7 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_raises_regex
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index 3407d00fb1cc4..99c9c0009435b 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -19,7 +19,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils import compute_class_weight
 from sklearn.utils import check_random_state
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index abdb2ecefd10b..1dd2f48895649 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -9,10 +9,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
index 326bcc94433bc..6f20df5caaa6a 100644
--- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -3,10 +3,7 @@
 
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_less
 
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_warns
 from sklearn.exceptions import ConvergenceWarning
diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py
index da8607a31b916..28505e311abc4 100644
--- a/sklearn/manifold/tests/test_isomap.py
+++ b/sklearn/manifold/tests/test_isomap.py
@@ -8,7 +8,6 @@
 from sklearn import neighbors
 from sklearn import pipeline
 from sklearn import preprocessing
-from sklearn.utils.testing import assert_less
 
 from scipy.sparse import rand as sparse_rand
 
diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py
index adaddb03d2af3..09a748dab90c3 100644
--- a/sklearn/manifold/tests/test_locally_linear.py
+++ b/sklearn/manifold/tests/test_locally_linear.py
@@ -6,7 +6,6 @@
 
 from sklearn import neighbors, manifold
 from sklearn.manifold.locally_linear import barycenter_kneighbors_graph
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_raises
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index a14b58eb243fa..9209f5762d221 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -17,7 +17,7 @@
 from sklearn.utils.extmath import _deterministic_vector_sign_flip
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal, assert_raises
+from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import SkipTest
 
 
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 5f49cc3e3e507..2e38169a3de6a 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -8,15 +8,10 @@
 
 from sklearn.neighbors import BallTree
 from sklearn.neighbors import NearestNeighbors
-from sklearn.utils.testing import assert_less_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises_regexp
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import skip_if_32bit
 from sklearn.utils import check_random_state
 from sklearn.manifold.t_sne import _joint_probabilities
diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py
index d98ee1fc86b20..d56e5b088df02 100644
--- a/sklearn/metrics/cluster/tests/test_bicluster.py
+++ b/sklearn/metrics/cluster/tests/test_bicluster.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from sklearn.utils.testing import assert_equal, assert_almost_equal
+from sklearn.utils.testing import assert_almost_equal
 
 from sklearn.metrics.cluster.bicluster import _jaccard
 from sklearn.metrics import consensus_score
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
index 3d225321d211b..02a4e85501e77 100644
--- a/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -5,10 +5,8 @@
 
 from sklearn import datasets
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_warns_message
 from sklearn.metrics.cluster import silhouette_score
 from sklearn.metrics.cluster import silhouette_samples
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index c65fb969626df..d9a5749980179 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -15,7 +15,6 @@
 from sklearn.utils.validation import check_random_state
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
@@ -23,7 +22,6 @@
 from sklearn.utils.testing import assert_warns_div0
 from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.mocking import MockDataFrame
 
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 13ab6ecd3d804..67e9b66a4b695 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -20,7 +20,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index ecf943a4c0bcc..89d343d092fdc 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -11,11 +11,9 @@
 
 from sklearn import config_context
 
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regexp
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 78e2d2a69a48c..140c1c7abad9c 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -12,9 +12,8 @@
 from sklearn.utils.validation import check_array, check_consistent_length
 from sklearn.utils.validation import check_random_state
 
-from sklearn.utils.testing import assert_raises, clean_warning_registry
+from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 7903de36260f5..bc4cacb62e8d7 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -4,7 +4,6 @@
 import pytest
 
 from sklearn.utils.testing import assert_raises, assert_raises_regex
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 8ce7fd6389271..f1b9120b06442 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -10,11 +10,9 @@
 
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import assert_not_equal
 
 from sklearn.base import BaseEstimator
 from sklearn.metrics import (f1_score, r2_score, roc_auc_score, fbeta_score,
diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index d62920af454e5..74426c81ef803 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -20,7 +20,7 @@
 
 from sklearn.mixture.tests.test_gaussian_mixture import RandomData
 from sklearn.exceptions import ConvergenceWarning, NotFittedError
-from sklearn.utils.testing import assert_greater_equal, ignore_warnings
+from sklearn.utils.testing import ignore_warnings
 
 
 COVARIANCE_TYPE = ['full', 'tied', 'diag', 'spherical']
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index c1f451c7d8495..66a42bd843283 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -28,9 +28,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 695e68aae7dfe..90a837e7f49f1 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -13,8 +13,6 @@
 import pytest
 
 from sklearn.utils.fixes import sp_version
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
@@ -23,7 +21,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.mocking import CheckingClassifier, MockDataFrame
 
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index f0c0f6f453c5c..a6f1fbee7cf90 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -7,13 +7,9 @@
 from itertools import combinations
 from itertools import combinations_with_replacement
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regexp
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_greater_equal
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 2c84439a7c29d..6fa2e4fee5ed7 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -13,15 +13,12 @@
 
 from sklearn.model_selection.tests.test_search import FailingClassifier
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_raises_regex
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_allclose
diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
index a00017494e328..1dc13f4ac759a 100644
--- a/sklearn/neighbors/tests/test_lof.py
+++ b/sklearn/neighbors/tests/test_lof.py
@@ -13,9 +13,7 @@
 from sklearn.metrics import roc_auc_score
 
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index afa7159d3d61c..6e440aebfc427 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -16,9 +16,6 @@
 from sklearn.neighbors.base import VALID_METRICS_SPARSE, VALID_METRICS
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_warns
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index 6d21ba340b1ae..46769cad40edf 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -21,9 +21,6 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_less
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater_equal
-from sklearn.utils.testing import assert_less_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
index 1735b0021c545..73d7f40927b5e 100644
--- a/sklearn/preprocessing/tests/test_function_transformer.py
+++ b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -6,7 +6,6 @@
 from sklearn.utils.testing import (assert_equal, assert_array_equal,
                                    assert_allclose_dense_sparse)
 from sklearn.utils.testing import assert_warns_message, assert_no_warnings
-from sklearn.utils.testing import ignore_warnings
 
 
 def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X):
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index ebb7d6b0138f8..a095f4ec64cab 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -12,7 +12,6 @@
 from sklearn.utils.multiclass import type_of_target
 
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index d5a4449ee9cc5..aff9bba67d298 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_no_warnings
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index d7f7a1534f728..55d60b5351a4b 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -17,8 +17,6 @@
 from sklearn.metrics import f1_score
 from sklearn.metrics.pairwise import rbf_kernel
 from sklearn.utils import check_random_state
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater, assert_in, assert_less
 from sklearn.utils.testing import assert_raises_regexp, assert_warns
 from sklearn.utils.testing import assert_warns_message, assert_raise_message
 from sklearn.utils.testing import ignore_warnings, assert_raises
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 257753c23584f..032d9b232523f 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -7,12 +7,9 @@
 
 import sklearn
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_no_warnings
 from sklearn.utils.testing import assert_warns_message
-from sklearn.utils.testing import assert_dict_equal
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn.base import BaseEstimator, clone, is_classifier
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index af5ea34e1d604..51f71f2f7919b 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -16,8 +16,6 @@
 import pytest
 
 from sklearn.utils.testing import all_estimators
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import ignore_warnings
 from sklearn.exceptions import ConvergenceWarning, SkipTestWarning
 
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index e6527cc4330f7..6a32c4ec15058 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -11,12 +11,10 @@
                                    assert_warns_message)
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_allclose
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import ignore_warnings
 
 from sklearn.datasets import make_blobs
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 936b691a12485..a59ab5c7442dd 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -7,7 +7,7 @@
 import importlib
 
 from pkgutil import walk_packages
-from inspect import getsource, isabstract, signature
+from inspect import signature
 
 import sklearn
 from sklearn.utils import IS_PYPY
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index dc80b2d073d81..4301a4c07654f 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -7,7 +7,6 @@
 from sklearn.base import clone
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns_message
diff --git a/sklearn/tests/test_init.py b/sklearn/tests/test_init.py
index c2a216dddb937..06aeeacd1c9a0 100644
--- a/sklearn/tests/test_init.py
+++ b/sklearn/tests/test_init.py
@@ -1,6 +1,5 @@
 # Basic unittests to test functioning of module's top-level
 
-from sklearn.utils.testing import assert_equal
 
 __author__ = 'Yaroslav Halchenko'
 __license__ = 'BSD'
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index 79848f5561799..ed0d95d568b6a 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -2,10 +2,8 @@
 from scipy.sparse import csr_matrix
 import pytest
 
-from sklearn.utils.testing import assert_array_equal, assert_equal
-from sklearn.utils.testing import assert_not_equal
+from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal, assert_raises
-from sklearn.utils.testing import assert_less_equal
 
 from sklearn.metrics.pairwise import kernel_metrics
 from sklearn.kernel_approximation import RBFSampler
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 61b34a7509200..aef4080e85e1d 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -6,11 +6,9 @@
 from re import escape
 
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_raises_regexp
 from sklearn.multiclass import OneVsRestClassifier
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 35a739fee122b..2f4369c8e8085 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -9,9 +9,6 @@
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_not_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn import datasets
 from sklearn.base import clone
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 018860d96fa84..d8bfcc12993c2 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -13,10 +13,8 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_no_warnings
 
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 0a2e67d599d85..e02b5ef96b7b0 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -16,11 +16,9 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_dict_equal
 from sklearn.utils.testing import assert_no_warnings
 
 from sklearn.base import clone, BaseEstimator
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index 93d22ba0e88ef..000a0488f9bed 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -13,13 +13,10 @@
 from sklearn.random_projection import SparseRandomProjection
 from sklearn.random_projection import GaussianRandomProjection
 
-from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_warns
 from sklearn.exceptions import DataDimensionalityWarning
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index dbce4a5d0d560..12b424b9bf3b7 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -22,13 +22,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
-from sklearn.utils.testing import assert_equal
-from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import assert_raises
-from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_greater_equal
-from sklearn.utils.testing import assert_less
-from sklearn.utils.testing import assert_less_equal
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import ignore_warnings
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index e67fa6eb898ec..59db6fe5f27a7 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -10,7 +10,6 @@
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raise_message
-from sklearn.utils.testing import assert_equal
 
 
 def test_compute_class_weight():
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index bc8b598764b1a..2da6e5f5e9943 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -12,12 +12,10 @@
 
 import pytest
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_almost_equal
 from sklearn.utils.testing import assert_allclose
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import assert_warns_message
 from sklearn.utils.testing import skip_if_32bit
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index b060c5f599e9e..4afbf9e1cbbab 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -3,7 +3,6 @@
 import numpy as np
 
 from sklearn.utils.fast_dict import IntFloatDict, argmin
-from sklearn.utils.testing import assert_equal
 
 
 def test_int_float_dict():
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 4dc44e797e211..e28adc249f04d 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -13,7 +13,6 @@
 
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_allclose
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index abd03bff61d29..838c8c8000b9e 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -6,7 +6,6 @@
 from sklearn.utils.murmurhash import murmurhash3_32
 from numpy.testing import assert_array_almost_equal
 from numpy.testing import assert_array_equal
-from sklearn.utils.testing import assert_equal
 
 
 def test_mmhash3_int():
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 7cd6929892170..0aa8eae22b1e2 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -11,7 +11,6 @@
 import numpy as np
 import scipy.sparse as sp
 
-from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
 from sklearn.utils.testing import assert_no_warnings