From b2ac13c88c0386e05bc65c3b6f1e5781c44cefe1 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 29 Aug 2017 02:08:26 -0400
Subject: [PATCH 01/51] initial commit

---
 sklearn/utils/tests/test_estimator_checks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 1b3a1ea7e597a..7f88e9e147834 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -251,3 +251,8 @@ def __init__(self):
                         check_no_fit_attributes_set_in_init,
                         'estimator_name',
                         NonConformantEstimator)
+
+def test_check_estimator_pairwise():
+    # check that check_estimator() works on estimator with _pairwise
+    # attribute set_random_state
+    pass

From 124622b6c92ee3c434367f0e921e0cf24efdbd0d Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 29 Aug 2017 12:59:09 -0400
Subject: [PATCH 02/51] add test for check_estimator on
 SVC(kernel='precomputed')

---
 sklearn/utils/tests/test_estimator_checks.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 7f88e9e147834..82afcac04c2e9 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -18,6 +18,7 @@
 from sklearn.cluster import MiniBatchKMeans
 from sklearn.decomposition import NMF
 from sklearn.linear_model import MultiTaskElasticNet
+from sklearn.svm import SVC
 from sklearn.utils.validation import check_X_y, check_array
 
 
@@ -254,5 +255,6 @@ def __init__(self):
 
 def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
-    # attribute set_random_state
-    pass
+    # attribute set
+    est = SVC(kernel='precomputed')
+    check_estimator(est)

From 578865e061e8669f1e8e5077db17a735b2f58378 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 6 Sep 2017 18:17:54 -0400
Subject: [PATCH 03/51] change tests to run on estimators with _pairwise set to
 True

---
 sklearn/base.py                   | 18 ++++++++
 sklearn/utils/estimator_checks.py | 77 ++++++++++++++++++++++++-------
 2 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index aa4f9f9ce17c1..79e78761657a2 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -578,3 +578,21 @@ def is_regressor(estimator):
         True if estimator is a regressor and False otherwise.
     """
     return getattr(estimator, "_estimator_type", None) == "regressor"
+
+
+def is_pairwise(estimator):
+    """Returns True if the given estimator has a _pairwise attribute
+    set to True.
+
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if _pairwise is set and True and False otherwise.
+    """
+    return getattr(estimator, "_pairwise", False)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3e7cb198a9d12..63c9536ac0e45 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -36,11 +36,13 @@
 
 
 from sklearn.base import (clone, TransformerMixin, ClusterMixin,
-                          BaseEstimator, is_classifier, is_regressor)
+                          BaseEstimator, is_classifier, is_regressor,
+                          is_pairwise)
 from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score
 
 from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
+from sklearn.svm import SVC
 from sklearn.svm.base import BaseLibSVM
 from sklearn.linear_model.stochastic_gradient import BaseSGD
 from sklearn.pipeline import make_pipeline
@@ -48,6 +50,8 @@
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import SkipTestWarning
 from sklearn.model_selection import train_test_split
+from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity,
+                                      linear_kernel)
 
 from sklearn.utils import shuffle
 from sklearn.utils.fixes import signature
@@ -353,10 +357,22 @@ def _is_32bit():
     return struct.calcsize('P') * 8 == 32
 
 
+def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
+    if is_pairwise(estimator):
+        return kernel(X, X)
+    return X
+
+
 def check_estimator_sparse_data(name, estimator_orig):
+
+    # Sparse precomputed kernels aren't supported
+    if getattr(estimator_orig, 'kernel', None) == 'precomputed':
+        return
+
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     X_csr = sparse.csr_matrix(X)
     y = (4 * rng.rand(40)).astype(np.int)
     # catch deprecation warnings
@@ -404,6 +420,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
         try:
             import pandas as pd
             X = pd.DataFrame([[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]])
+            X = gram_matrix_if_pairwise(X, estimator_orig)
             y = pd.Series([1, 1, 1, 2, 2, 2])
             weights = pd.Series([1] * 6)
             try:
@@ -424,7 +441,7 @@ def check_sample_weights_list(name, estimator_orig):
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
-        X = rnd.uniform(size=(10, 3))
+        X = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
         y = np.arange(10) % 3
         y = multioutput_estimator_convert_y_2d(estimator, y)
         sample_weight = [3] * 10
@@ -436,7 +453,8 @@ def check_sample_weights_list(name, estimator_orig):
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible
     rng = np.random.RandomState(0)
-    X = rng.rand(40, 10).astype(object)
+    X = gram_matrix_if_pairwise(rng.rand(40, 10), estimator_orig)
+    X = X.astype(object)
     y = (X[:, 0] * 4).astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -483,6 +501,8 @@ def check_dict_unchanged(name, estimator_orig):
     else:
         X = 2 * rnd.uniform(size=(20, 3))
 
+    X = gram_matrix_if_pairwise(X, estimator_orig)
+
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -520,6 +540,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     estimator = clone(estimator_orig)
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -566,6 +587,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     # check by fitting a 2d array and predicting with a 1d array
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -798,6 +820,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
+    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     set_random_state(estimator)
@@ -822,6 +845,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     # in fit and score so they can be used in pipelines
     rnd = np.random.RandomState(0)
     X = rnd.uniform(size=(10, 3))
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -847,6 +871,7 @@ def check_fit_score_takes_y(name, estimator_orig):
 def check_estimators_dtypes(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
+    X_train_32 = gram_matrix_if_pairwise(X_train_32, estimator_orig)
     X_train_64 = X_train_32.astype(np.float64)
     X_train_int_64 = X_train_32.astype(np.int64)
     X_train_int_32 = X_train_32.astype(np.int32)
@@ -892,7 +917,8 @@ def check_estimators_empty_data_messages(name, estimator_orig):
 def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
-    X_train_finite = rnd.uniform(size=(10, 3))
+    X_train_finite = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)),
+                                             estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
     X_train_inf = rnd.uniform(size=(10, 3))
@@ -969,6 +995,7 @@ def check_estimators_pickle(name, estimator_orig):
 
     # some estimators can't do features less than 0
     X -= X.min()
+    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
 
     estimator = clone(estimator_orig)
 
@@ -1119,6 +1146,7 @@ def check_classifiers_train(name, classifier_orig):
         classifier = clone(classifier_orig)
         if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB']:
             X -= X.min()
+        X = gram_matrix_if_pairwise(X, classifier_orig)
         set_random_state(classifier)
         # raises error on malformed input for fit
         with assert_raises(ValueError, msg="The classifer {} does not"
@@ -1140,11 +1168,12 @@ def check_classifiers_train(name, classifier_orig):
             assert_greater(accuracy_score(y, y_pred), 0.83)
 
         # raises error on malformed input for predict
-        with assert_raises(ValueError, msg="The classifier {} does not"
-                           " raise an error when the number of features "
-                           "in predict is different from the number of"
-                           " features in fit.".format(name)):
-            classifier.predict(X.T)
+        if not is_pairwise(classifier):
+            with assert_raises(ValueError, msg="The classifier {} does not"
+                               " raise an error when the number of features "
+                               "in predict is different from the number of"
+                               " features in fit.".format(name)):
+                classifier.predict(X.T)
         if hasattr(classifier, "decision_function"):
             try:
                 # decision_function agrees with predict
@@ -1160,12 +1189,13 @@ def check_classifiers_train(name, classifier_orig):
                     assert_array_equal(np.argmax(decision, axis=1), y_pred)
 
                 # raises error on malformed input for decision_function
-                with assert_raises(ValueError, msg="The classifier {} does"
-                                   " not raise an error when the number of "
-                                   "features in decision_function is "
-                                   "different from the number of features"
-                                   " in fit.".format(name)):
-                    classifier.decision_function(X.T)
+                if not is_pairwise(classifier):
+                    with assert_raises(ValueError, msg="The classifier {} does"
+                                       " not raise an error when the number of "
+                                       "features in decision_function is "
+                                       "different from the number of features"
+                                       " in fit.".format(name)):
+                        classifier.decision_function(X.T)
             except NotImplementedError:
                 pass
         if hasattr(classifier, "predict_proba"):
@@ -1194,6 +1224,7 @@ def check_estimators_fit_returns_self(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9, n_features=4)
     # some want non-negative input
     X -= X.min()
+    X = gram_matrix_if_pairwise(X, estimator_orig)
 
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -1241,7 +1272,7 @@ def check_supervised_y_2d(name, estimator_orig):
         # These only work on 2d, so this test makes no sense
         return
     rnd = np.random.RandomState(0)
-    X = rnd.uniform(size=(10, 3))
+    X = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     set_random_state(estimator)
@@ -1275,6 +1306,7 @@ def check_classifiers_classes(name, classifier_orig):
     # We need to make sure that we have non negative data, for things
     # like NMF
     X -= X.min() - .1
+    X = gram_matrix_if_pairwise(X, classifier_orig)
     y_names = np.array(["one", "two", "three"])[y]
 
     for y_names in [y_names, y_names.astype('O')]:
@@ -1408,6 +1440,11 @@ def check_class_weight_classifiers(name, classifier_orig):
         X, y = make_blobs(centers=n_centers, random_state=0, cluster_std=20)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
                                                             random_state=0)
+
+        X_train = gram_matrix_if_pairwise(X_train, classifier_orig,
+                                          kernel=rbf_kernel)
+        X_test = gram_matrix_if_pairwise(X_test, classifier_orig,
+                                         kernel=rbf_kernel)
         n_centers = len(np.unique(y_train))
 
         if n_centers == 2:
@@ -1491,6 +1528,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9)
     # some want non-negative input
     X -= X.min()
+    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -1565,6 +1603,7 @@ def check_sparsify_coefficients(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_classifier_data_not_an_array(name, estimator_orig):
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]])
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = [1, 1, 1, 2, 2, 2]
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
@@ -1573,6 +1612,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
     X, y = _boston_subset(n_samples=50)
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 
@@ -1795,3 +1835,8 @@ def check_decision_proba_consistency(name, estimator_orig):
         a = estimator.predict_proba(X_test)[:, 1]
         b = estimator.decision_function(X_test)
         assert_array_equal(rankdata(a), rankdata(b))
+
+
+def check_pairwise_estimator():
+    est = SVC(kernel='precomputed')
+    check_estimator(est)

From d6f3c27cc0e67ee426bed74718fcb53c2f3a5c23 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 6 Sep 2017 23:12:08 -0400
Subject: [PATCH 04/51] fix typo in is_pairwise docstring

---
 sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 79e78761657a2..6df41978ce134 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -593,6 +593,6 @@ def is_pairwise(estimator):
     Returns
     -------
     out : bool
-        True if _pairwise is set and True and False otherwise.
+        True if _pairwise is set to True and False otherwise.
     """
     return getattr(estimator, "_pairwise", False)

From d9fff0ad71eaa64bb875f7ffea2eb1ecdea639d1 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 7 Sep 2017 02:39:54 -0400
Subject: [PATCH 05/51] fix PEP8 issues: line length and unused import

---
 sklearn/utils/estimator_checks.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 63c9536ac0e45..50a41f7e49bb6 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -50,8 +50,7 @@
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import SkipTestWarning
 from sklearn.model_selection import train_test_split
-from sklearn.metrics.pairwise import (rbf_kernel, cosine_similarity,
-                                      linear_kernel)
+from sklearn.metrics.pairwise import rbf_kernel, linear_kernel
 
 from sklearn.utils import shuffle
 from sklearn.utils.fixes import signature
@@ -1191,8 +1190,8 @@ def check_classifiers_train(name, classifier_orig):
                 # raises error on malformed input for decision_function
                 if not is_pairwise(classifier):
                     with assert_raises(ValueError, msg="The classifier {} does"
-                                       " not raise an error when the number of "
-                                       "features in decision_function is "
+                                       " not raise an error when the number "
+                                       "of features in decision_function is "
                                        "different from the number of features"
                                        " in fit.".format(name)):
                         classifier.decision_function(X.T)

From e89b9e4e141202041eac60229d282de5216ce80a Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 14 Sep 2017 13:29:19 -0400
Subject: [PATCH 06/51] use is_pairwise() to check for precomputed kernel

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 50a41f7e49bb6..e23b5d434f95b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -365,7 +365,7 @@ def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
 def check_estimator_sparse_data(name, estimator_orig):
 
     # Sparse precomputed kernels aren't supported
-    if getattr(estimator_orig, 'kernel', None) == 'precomputed':
+    if is_pairwise(estimator_orig):
         return
 
     rng = np.random.RandomState(0)

From c9c6a491ec671a3274a5bc8c51b61a7bc4dd87f5 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 02:07:02 -0400
Subject: [PATCH 07/51] fix precomputed test/train matricies for
 check_class_weight_classifiers

---
 sklearn/utils/estimator_checks.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index e23b5d434f95b..55abc859f774c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1440,10 +1440,11 @@ def check_class_weight_classifiers(name, classifier_orig):
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
                                                             random_state=0)
 
-        X_train = gram_matrix_if_pairwise(X_train, classifier_orig,
-                                          kernel=rbf_kernel)
-        X_test = gram_matrix_if_pairwise(X_test, classifier_orig,
-                                         kernel=rbf_kernel)
+        # can't use gram_if_pairwise() here, setting up gram matrix manually
+        if is_pairwise(classifier_orig):
+            X_test = rbf_kernel(X_test, X_train)
+            X_train = rbf_kernel(X_train, X_train)
+
         n_centers = len(np.unique(y_train))
 
         if n_centers == 2:

From 7894231472d8da91a07c37c864c0870accb1c61c Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 02:21:13 -0400
Subject: [PATCH 08/51] fix PEP8 issues

---
 sklearn/base.py                              | 8 +++-----
 sklearn/utils/estimator_checks.py            | 4 ----
 sklearn/utils/tests/test_estimator_checks.py | 1 +
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 98b9426a2a06d..080190693fa13 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -551,7 +551,6 @@ def is_classifier(estimator):
 def is_regressor(estimator):
     """Returns True if the given estimator is (probably) a regressor.
 
-
     Parameters
     ----------
     estimator : object
@@ -566,9 +565,7 @@ def is_regressor(estimator):
 
 
 def is_pairwise(estimator):
-    """Returns True if the given estimator has a _pairwise attribute
-    set to True.
-
+    """Returns True if estimator has a _pairwise attribute set to True.
 
     Parameters
     ----------
@@ -580,4 +577,5 @@ def is_pairwise(estimator):
     out : bool
         True if _pairwise is set to True and False otherwise.
     """
-    return getattr(estimator, "_pairwise", False)
+    return bool(getattr(estimator, "_pairwise", False))
+
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 55abc859f774c..8d9fa76ceae29 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1836,7 +1836,3 @@ def check_decision_proba_consistency(name, estimator_orig):
         b = estimator.decision_function(X_test)
         assert_array_equal(rankdata(a), rankdata(b))
 
-
-def check_pairwise_estimator():
-    est = SVC(kernel='precomputed')
-    check_estimator(est)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 82afcac04c2e9..50b4ebd514a30 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -253,6 +253,7 @@ def __init__(self):
                         'estimator_name',
                         NonConformantEstimator)
 
+
 def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
     # attribute set

From d3fcb3e01551621000a40aa89f311d117f90d47a Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 03:06:31 -0400
Subject: [PATCH 09/51] add final empty line

---
 sklearn/utils/tests/test_estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 50b4ebd514a30..df98631c6be48 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -259,3 +259,4 @@ def test_check_estimator_pairwise():
     # attribute set
     est = SVC(kernel='precomputed')
     check_estimator(est)
+

From ffeb68e95c53c2ec82ef8289f0d95195f30d4c8c Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 12:39:58 -0400
Subject: [PATCH 10/51] ensure check_sample_weights_pandas_series actually
 operates on pandas dataframes with pairwise kernel

---
 sklearn/utils/estimator_checks.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 8d9fa76ceae29..12a0fd68daa56 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -419,7 +419,10 @@ def check_sample_weights_pandas_series(name, estimator_orig):
         try:
             import pandas as pd
             X = pd.DataFrame([[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]])
-            X = gram_matrix_if_pairwise(X, estimator_orig)
+            # if _pairwise, feed estimator a pandas dataframe of the gram
+            # matrix
+            if is_pairwise(estimator_orig):
+                X = pd.DataFrame(rbf_kernel(X.values, X.values))
             y = pd.Series([1, 1, 1, 2, 2, 2])
             weights = pd.Series([1] * 6)
             try:
@@ -1835,4 +1838,3 @@ def check_decision_proba_consistency(name, estimator_orig):
         a = estimator.predict_proba(X_test)[:, 1]
         b = estimator.decision_function(X_test)
         assert_array_equal(rankdata(a), rankdata(b))
-

From 298fa8498d05727c0dfdcd79043c8421530762e1 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 12:52:16 -0400
Subject: [PATCH 11/51] remove blank lines as end of file, flake8

---
 sklearn/base.py                              | 1 -
 sklearn/utils/tests/test_estimator_checks.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 080190693fa13..135016613f138 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -578,4 +578,3 @@ def is_pairwise(estimator):
         True if _pairwise is set to True and False otherwise.
     """
     return bool(getattr(estimator, "_pairwise", False))
-
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index df98631c6be48..50b4ebd514a30 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -259,4 +259,3 @@ def test_check_estimator_pairwise():
     # attribute set
     est = SVC(kernel='precomputed')
     check_estimator(est)
-

From a6319511d4dd7c041c833064f500d3c5059488cf Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 18 Sep 2017 12:55:33 -0400
Subject: [PATCH 12/51] remove unused import

---
 sklearn/utils/estimator_checks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 12a0fd68daa56..589438f68e39c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -42,7 +42,6 @@
 
 from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
-from sklearn.svm import SVC
 from sklearn.svm.base import BaseLibSVM
 from sklearn.linear_model.stochastic_gradient import BaseSGD
 from sklearn.pipeline import make_pipeline

From b58e6bf4017ba67bf24a6a1fa5bb3066c2b57401 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 19 Sep 2017 16:38:10 -0400
Subject: [PATCH 13/51] add estimator check for estimators that are based on a
 metric as well as a kernel

---
 sklearn/utils/tests/test_estimator_checks.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 50b4ebd514a30..f5d8e68486414 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -19,6 +19,7 @@
 from sklearn.decomposition import NMF
 from sklearn.linear_model import MultiTaskElasticNet
 from sklearn.svm import SVC
+from sklearn.neighbors import KNeighborsRegressor
 from sklearn.utils.validation import check_X_y, check_array
 
 
@@ -259,3 +260,10 @@ def test_check_estimator_pairwise():
     # attribute set
     est = SVC(kernel='precomputed')
     check_estimator(est)
+
+def test_check_estimator_metric_and_kernel():
+    # check that check_estimator works for estimator that is based on
+    # a metric as well as a kernel
+
+    est = KNeighborsRegressor()
+    check_estimator(est)

From 273d8ee97ee49dd3cf3f4d205cc6118f16f60828 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 19 Sep 2017 21:10:11 -0400
Subject: [PATCH 14/51] add extra line, PEP8

---
 sklearn/utils/tests/test_estimator_checks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index f5d8e68486414..b15f893cebd85 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -261,6 +261,7 @@ def test_check_estimator_pairwise():
     est = SVC(kernel='precomputed')
     check_estimator(est)
 
+
 def test_check_estimator_metric_and_kernel():
     # check that check_estimator works for estimator that is based on
     # a metric as well as a kernel

From 68bacdb37d279c5be289def718f04075993977e3 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 20 Sep 2017 12:43:47 -0400
Subject: [PATCH 15/51] add check to ensure test_check_estimator_pairwise
 actually checks a pairwise estimator

---
 sklearn/utils/tests/test_estimator_checks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index b15f893cebd85..b5c55eaa09600 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -8,6 +8,7 @@
 from sklearn.utils.testing import (assert_raises_regex, assert_true,
                                    assert_equal, ignore_warnings)
 from sklearn.utils.estimator_checks import check_estimator
+from sklearn.utils.estimator_checks import is_pairwise
 from sklearn.utils.estimator_checks import set_random_state
 from sklearn.utils.estimator_checks import set_checking_parameters
 from sklearn.utils.estimator_checks import check_estimators_unfitted
@@ -259,6 +260,7 @@ def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
     # attribute set
     est = SVC(kernel='precomputed')
+    assert(is_pairwise(est))
     check_estimator(est)
 
 

From 142eab4e1fdb98308cad42f1d597af0b20ed2578 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 01:54:41 -0400
Subject: [PATCH 16/51] alter gram_matrix_if_pairwise to account for pairwise
 metrics

---
 sklearn/base.py                              | 20 ++++++++++++++++++++
 sklearn/utils/estimator_checks.py            | 12 ++++++++++--
 sklearn/utils/tests/test_estimator_checks.py |  4 ++--
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 135016613f138..fd166d9874efd 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -578,3 +578,23 @@ def is_pairwise(estimator):
         True if _pairwise is set to True and False otherwise.
     """
     return bool(getattr(estimator, "_pairwise", False))
+
+
+def is_pairwise_metric(estimator):
+    """Returns True if estimator has a _pairwise attribute set to True.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if _pairwise is set to True and False otherwise.
+    """
+    metric = getattr(estimator,  "metric", None)
+    precomputed_metric = metric == 'precomputed'
+    pairwise = is_pairwise(estimator)
+
+    return precomputed_metric and pairwise
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3179f8658d29c..3f08b3a72639e 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -37,9 +37,10 @@
 
 from sklearn.base import (clone, TransformerMixin, ClusterMixin,
                           BaseEstimator, is_classifier, is_regressor,
-                          is_pairwise)
+                          is_pairwise, is_pairwise_metric)
 from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score
 
+from sklearn.covariance import LedoitWolf
 from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
 from sklearn.svm.base import BaseLibSVM
@@ -50,6 +51,7 @@
 from sklearn.exceptions import SkipTestWarning
 from sklearn.model_selection import train_test_split
 from sklearn.metrics.pairwise import rbf_kernel, linear_kernel
+from sklearn.metrics.pairwise import pairwise_distances
 
 from sklearn.utils import shuffle
 from sklearn.utils.fixes import signature
@@ -358,8 +360,13 @@ def _is_32bit():
 
 
 def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
+
+    if is_pairwise_metric(estimator):
+        return pairwise_distances(X, metric='mahalanobis')
+
     if is_pairwise(estimator):
         return kernel(X, X)
+
     return X
 
 
@@ -1332,7 +1339,7 @@ def check_classifiers_classes(name, classifier_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_int(name, regressor_orig):
     X, _ = _boston_subset()
-    X = X[:50]
+    X = gram_matrix_if_pairwise(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
     y = multioutput_estimator_convert_y_2d(regressor_orig, y)
@@ -1360,6 +1367,7 @@ def check_regressors_int(name, regressor_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_train(name, regressor_orig):
     X, y = _boston_subset()
+    X = gram_matrix_if_pairwise(X, regressor_orig)
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
     regressor = clone(regressor_orig)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index b5c55eaa09600..005f876462a0d 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -264,9 +264,9 @@ def test_check_estimator_pairwise():
     check_estimator(est)
 
 
-def test_check_estimator_metric_and_kernel():
+def test_check_estimator_pairwise_metric():
     # check that check_estimator works for estimator that is based on
     # a metric as well as a kernel
 
-    est = KNeighborsRegressor()
+    est = KNeighborsRegressor(metric='precomputed')
     check_estimator(est)

From c06e404d47525c6a34aa8d9c9dcbaf3a30e86625 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 02:33:41 -0400
Subject: [PATCH 17/51] make test for 2d y features work

---
 sklearn/utils/estimator_checks.py            | 1 +
 sklearn/utils/tests/test_estimator_checks.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3f08b3a72639e..d51e5708b34ed 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -651,6 +651,7 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
+    X = gram_matrix_if_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 005f876462a0d..457d0b4dc6932 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -8,7 +8,7 @@
 from sklearn.utils.testing import (assert_raises_regex, assert_true,
                                    assert_equal, ignore_warnings)
 from sklearn.utils.estimator_checks import check_estimator
-from sklearn.utils.estimator_checks import is_pairwise
+from sklearn.utils.estimator_checks import is_pairwise, is_pairwise_metric
 from sklearn.utils.estimator_checks import set_random_state
 from sklearn.utils.estimator_checks import set_checking_parameters
 from sklearn.utils.estimator_checks import check_estimators_unfitted
@@ -259,6 +259,7 @@ def __init__(self):
 def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
     # attribute set
+
     est = SVC(kernel='precomputed')
     assert(is_pairwise(est))
     check_estimator(est)
@@ -269,4 +270,5 @@ def test_check_estimator_pairwise_metric():
     # a metric as well as a kernel
 
     est = KNeighborsRegressor(metric='precomputed')
+    assert(is_pairwise_metric(est))
     check_estimator(est)

From 7c7f3c400b243f47aa45ae496a6e3fb0cbbf5ec8 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 02:36:47 -0400
Subject: [PATCH 18/51] refactor is_pairwise_metric()

---
 sklearn/base.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index fd166d9874efd..815c1471149e4 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -594,7 +594,6 @@ def is_pairwise_metric(estimator):
         True if _pairwise is set to True and False otherwise.
     """
     metric = getattr(estimator,  "metric", None)
-    precomputed_metric = metric == 'precomputed'
-    pairwise = is_pairwise(estimator)
 
-    return precomputed_metric and pairwise
+    return  metric == 'precomputed' and is_pairwise(estimator)
+

From 7d53c90233acda31c65aaf3eff688bf85e5de1da Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 02:37:51 -0400
Subject: [PATCH 19/51] remove extra line

---
 sklearn/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 815c1471149e4..cec252508b622 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -596,4 +596,3 @@ def is_pairwise_metric(estimator):
     metric = getattr(estimator,  "metric", None)
 
     return  metric == 'precomputed' and is_pairwise(estimator)
-

From 9fd424e61a3153b453fd22324f26d91ad2c00e28 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 02:53:50 -0400
Subject: [PATCH 20/51] fix grammar in docstring

---
 sklearn/utils/tests/test_estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 457d0b4dc6932..393a09c438e23 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -267,7 +267,7 @@ def test_check_estimator_pairwise():
 
 def test_check_estimator_pairwise_metric():
     # check that check_estimator works for estimator that is based on
-    # a metric as well as a kernel
+    # a precomputed metric
 
     est = KNeighborsRegressor(metric='precomputed')
     assert(is_pairwise_metric(est))

From e10fafd5f2707aa7fdf363c34bccf6353829729b Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 28 Sep 2017 16:51:46 -0400
Subject: [PATCH 21/51] fix gram_matrix_if_pairwise to accept flat 1-D X vector
 in python 3

---
 sklearn/utils/estimator_checks.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d51e5708b34ed..f9262c0402050 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -361,6 +361,9 @@ def _is_32bit():
 
 def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
 
+    if len(X.shape) ==  1:
+        X = X.reshape(-1, 1)
+
     if is_pairwise_metric(estimator):
         return pairwise_distances(X, metric='mahalanobis')
 

From 85bed8ac7c27d2c865c17e4d663e301d6176ec97 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Sun, 1 Oct 2017 23:26:15 -0400
Subject: [PATCH 22/51] remove extra spaces

---
 sklearn/base.py                   | 2 +-
 sklearn/utils/estimator_checks.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index cec252508b622..d7d800d945b50 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -595,4 +595,4 @@ def is_pairwise_metric(estimator):
     """
     metric = getattr(estimator,  "metric", None)
 
-    return  metric == 'precomputed' and is_pairwise(estimator)
+    return metric == 'precomputed' and is_pairwise(estimator)
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index f9262c0402050..ae49a91ba1adf 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -361,7 +361,7 @@ def _is_32bit():
 
 def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
 
-    if len(X.shape) ==  1:
+    if len(X.shape) == 1:
         X = X.reshape(-1, 1)
 
     if is_pairwise_metric(estimator):

From 3b9dd25c862a96ee548df1d290cb87827497f7b9 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 6 Oct 2017 14:35:02 -0400
Subject: [PATCH 23/51] remove unused import in utils/estimator_checks.py

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index ae49a91ba1adf..320c93940eac6 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -40,7 +40,6 @@
                           is_pairwise, is_pairwise_metric)
 from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score
 
-from sklearn.covariance import LedoitWolf
 from sklearn.random_projection import BaseRandomProjection
 from sklearn.feature_selection import SelectKBest
 from sklearn.svm.base import BaseLibSVM
@@ -654,6 +653,7 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
+    print(name, X)
     X = gram_matrix_if_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)

From 1e9886ee91558217ea26c94078adff01386e1f7b Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 6 Oct 2017 17:29:17 -0400
Subject: [PATCH 24/51] manually create distance matrix for
 gram_matrix_if_pairwise()

---
 sklearn/utils/estimator_checks.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 320c93940eac6..b5284ca262d61 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -364,7 +364,15 @@ def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
         X = X.reshape(-1, 1)
 
     if is_pairwise_metric(estimator):
-        return pairwise_distances(X, metric='mahalanobis')
+        # pairwise_distance() fails for certain versions of scipy
+        n_obs = X.shape[0]
+        X_std = (X - X.mean(axis=0)) / X.std(axis=0)
+        X_out = np.zeros(shape=(n_obs, n_obs))
+        for i in range(n_obs):
+            for j in range(n_obs):
+                dist = np.sum((X_std[i] - X_std[j]) ** 2) ** .5
+                X_out[i,j] = dist
+        return X_out
 
     if is_pairwise(estimator):
         return kernel(X, X)
@@ -653,7 +661,6 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
-    print(name, X)
     X = gram_matrix_if_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)

From 78654a11d7f2337bd465ad4404b344aa0d59eadb Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 6 Oct 2017 17:49:00 -0400
Subject: [PATCH 25/51] PEP8, add a space

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index b5284ca262d61..99b572d59829d 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -371,7 +371,7 @@ def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
         for i in range(n_obs):
             for j in range(n_obs):
                 dist = np.sum((X_std[i] - X_std[j]) ** 2) ** .5
-                X_out[i,j] = dist
+                X_out[i, j] = dist
         return X_out
 
     if is_pairwise(estimator):

From 3777255be1d933795d804213bc8591558917a78d Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 6 Oct 2017 18:18:06 -0400
Subject: [PATCH 26/51] remove unused import

---
 sklearn/utils/estimator_checks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 99b572d59829d..50c67872aa888 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -50,7 +50,6 @@
 from sklearn.exceptions import SkipTestWarning
 from sklearn.model_selection import train_test_split
 from sklearn.metrics.pairwise import rbf_kernel, linear_kernel
-from sklearn.metrics.pairwise import pairwise_distances
 
 from sklearn.utils import shuffle
 from sklearn.utils.fixes import signature

From 9b92c7b2acdf342674cad64928bce019dcd7744a Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 17 Oct 2017 21:10:26 -0400
Subject: [PATCH 27/51] make check_classifiers_train() check shape for pairwise
 estimators, fix doctring for is_pairwise_metric()

---
 sklearn/base.py                   | 2 +-
 sklearn/utils/estimator_checks.py | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index d7d800d945b50..193c688c15083 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -581,7 +581,7 @@ def is_pairwise(estimator):
 
 
 def is_pairwise_metric(estimator):
-    """Returns True if estimator has a _pairwise attribute set to True.
+    """Returns True if estimator accepts pairwise metric.
 
     Parameters
     ----------
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 50c67872aa888..e25ed77c42968 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1181,7 +1181,13 @@ def check_classifiers_train(name, classifier_orig):
             assert_greater(accuracy_score(y, y_pred), 0.83)
 
         # raises error on malformed input for predict
-        if not is_pairwise(classifier):
+        if is_pairwise(classifier):
+            with assert_raises(ValueError, msg="The classifier {} does not"
+                               " raise an error when the number of features "
+                               "in predict is not equal to (n_test_samples,"
+                               "n_training_samples)".format(name)):
+                classifier.predict(X.reshape(-1, 1))
+        else:
             with assert_raises(ValueError, msg="The classifier {} does not"
                                " raise an error when the number of features "
                                "in predict is different from the number of"

From 687204f1846236e1b3e5f8867379672bd374f7d4 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 17 Oct 2017 21:23:08 -0400
Subject: [PATCH 28/51] use pairwise_distance() to create distance matrix

---
 sklearn/utils/estimator_checks.py | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index e25ed77c42968..62f68c902a82a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -49,7 +49,8 @@
 from sklearn.exceptions import DataConversionWarning
 from sklearn.exceptions import SkipTestWarning
 from sklearn.model_selection import train_test_split
-from sklearn.metrics.pairwise import rbf_kernel, linear_kernel
+from sklearn.metrics.pairwise import (rbf_kernel, linear_kernel,
+                                      pairwise_distances)
 
 from sklearn.utils import shuffle
 from sklearn.utils.fixes import signature
@@ -363,16 +364,7 @@ def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
         X = X.reshape(-1, 1)
 
     if is_pairwise_metric(estimator):
-        # pairwise_distance() fails for certain versions of scipy
-        n_obs = X.shape[0]
-        X_std = (X - X.mean(axis=0)) / X.std(axis=0)
-        X_out = np.zeros(shape=(n_obs, n_obs))
-        for i in range(n_obs):
-            for j in range(n_obs):
-                dist = np.sum((X_std[i] - X_std[j]) ** 2) ** .5
-                X_out[i, j] = dist
-        return X_out
-
+        return pairwise_distances(X, metric='mahalanobis')
     if is_pairwise(estimator):
         return kernel(X, X)
 

From cb095fc3da94043d578da44ce33464111331ee45 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 18 Oct 2017 18:39:08 -0400
Subject: [PATCH 29/51] rename gram_matrix_if_pairwise() to maybe_pairwise().
 refactor check_sample_weights_pandas_series() and is_pairwise_metric()

---
 sklearn/base.py                   |  2 +-
 sklearn/utils/estimator_checks.py | 51 +++++++++++++++----------------
 2 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 98302af60429a..2640550ee8963 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -594,4 +594,4 @@ def is_pairwise_metric(estimator):
     """
     metric = getattr(estimator,  "metric", None)
 
-    return metric == 'precomputed' and is_pairwise(estimator)
+    return bool(metric == 'precomputed')
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 2958854b73e74..da030d3f4ea65 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -358,7 +358,7 @@ def _is_32bit():
     return struct.calcsize('P') * 8 == 32
 
 
-def gram_matrix_if_pairwise(X, estimator, kernel=linear_kernel):
+def maybe_pairwise(X, estimator, kernel=linear_kernel):
 
     if len(X.shape) == 1:
         X = X.reshape(-1, 1)
@@ -380,7 +380,7 @@ def check_estimator_sparse_data(name, estimator_orig):
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     X_csr = sparse.csr_matrix(X)
     y = (4 * rng.rand(40)).astype(np.int)
     # catch deprecation warnings
@@ -427,11 +427,8 @@ def check_sample_weights_pandas_series(name, estimator_orig):
     if has_fit_parameter(estimator, "sample_weight"):
         try:
             import pandas as pd
-            X = pd.DataFrame([[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]])
-            # if _pairwise, feed estimator a pandas dataframe of the gram
-            # matrix
-            if is_pairwise(estimator_orig):
-                X = pd.DataFrame(rbf_kernel(X.values, X.values))
+            X = [[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]]
+            X = pd.DataFrame(maybe_pairwise(X, estimator_orig))
             y = pd.Series([1, 1, 1, 2, 2, 2])
             weights = pd.Series([1] * 6)
             try:
@@ -452,7 +449,7 @@ def check_sample_weights_list(name, estimator_orig):
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
-        X = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
+        X = maybe_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
         y = np.arange(10) % 3
         y = multioutput_estimator_convert_y_2d(estimator, y)
         sample_weight = [3] * 10
@@ -464,7 +461,7 @@ def check_sample_weights_list(name, estimator_orig):
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible
     rng = np.random.RandomState(0)
-    X = gram_matrix_if_pairwise(rng.rand(40, 10), estimator_orig)
+    X = maybe_pairwise(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
     y = (X[:, 0] * 4).astype(np.int)
     estimator = clone(estimator_orig)
@@ -512,7 +509,7 @@ def check_dict_unchanged(name, estimator_orig):
     else:
         X = 2 * rnd.uniform(size=(20, 3))
 
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
@@ -551,7 +548,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     estimator = clone(estimator_orig)
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -598,7 +595,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     # check by fitting a 2d array and predicting with a 1d array
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -652,7 +649,7 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -825,7 +822,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
-    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     set_random_state(estimator)
@@ -850,7 +847,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     # in fit and score so they can be used in pipelines
     rnd = np.random.RandomState(0)
     X = rnd.uniform(size=(10, 3))
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -876,7 +873,7 @@ def check_fit_score_takes_y(name, estimator_orig):
 def check_estimators_dtypes(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
-    X_train_32 = gram_matrix_if_pairwise(X_train_32, estimator_orig)
+    X_train_32 = maybe_pairwise(X_train_32, estimator_orig)
     X_train_64 = X_train_32.astype(np.float64)
     X_train_int_64 = X_train_32.astype(np.int64)
     X_train_int_32 = X_train_32.astype(np.int32)
@@ -922,7 +919,7 @@ def check_estimators_empty_data_messages(name, estimator_orig):
 def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
-    X_train_finite = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)),
+    X_train_finite = maybe_pairwise(rnd.uniform(size=(10, 3)),
                                              estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
@@ -1000,7 +997,7 @@ def check_estimators_pickle(name, estimator_orig):
 
     # some estimators can't do features less than 0
     X -= X.min()
-    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
 
     estimator = clone(estimator_orig)
 
@@ -1170,7 +1167,7 @@ def check_classifiers_train(name, classifier_orig):
         classifier = clone(classifier_orig)
         if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB']:
             X -= X.min()
-        X = gram_matrix_if_pairwise(X, classifier_orig)
+        X = maybe_pairwise(X, classifier_orig)
         set_random_state(classifier)
         # raises error on malformed input for fit
         with assert_raises(ValueError, msg="The classifer {} does not"
@@ -1254,7 +1251,7 @@ def check_estimators_fit_returns_self(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9, n_features=4)
     # some want non-negative input
     X -= X.min()
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
 
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -1302,7 +1299,7 @@ def check_supervised_y_2d(name, estimator_orig):
         # These only work on 2d, so this test makes no sense
         return
     rnd = np.random.RandomState(0)
-    X = gram_matrix_if_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
+    X = maybe_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     set_random_state(estimator)
@@ -1336,7 +1333,7 @@ def check_classifiers_classes(name, classifier_orig):
     # We need to make sure that we have non negative data, for things
     # like NMF
     X -= X.min() - .1
-    X = gram_matrix_if_pairwise(X, classifier_orig)
+    X = maybe_pairwise(X, classifier_orig)
     y_names = np.array(["one", "two", "three"])[y]
 
     for y_names in [y_names, y_names.astype('O')]:
@@ -1368,7 +1365,7 @@ def check_classifiers_classes(name, classifier_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_int(name, regressor_orig):
     X, _ = _boston_subset()
-    X = gram_matrix_if_pairwise(X[:50], regressor_orig)
+    X = maybe_pairwise(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
     y = multioutput_estimator_convert_y_2d(regressor_orig, y)
@@ -1396,7 +1393,7 @@ def check_regressors_int(name, regressor_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_train(name, regressor_orig):
     X, y = _boston_subset()
-    X = gram_matrix_if_pairwise(X, regressor_orig)
+    X = maybe_pairwise(X, regressor_orig)
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
     regressor = clone(regressor_orig)
@@ -1562,7 +1559,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9)
     # some want non-negative input
     X -= X.min()
-    X = gram_matrix_if_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -1637,7 +1634,7 @@ def check_sparsify_coefficients(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_classifier_data_not_an_array(name, estimator_orig):
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]])
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = [1, 1, 1, 2, 2, 2]
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
@@ -1646,7 +1643,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
     X, y = _boston_subset(n_samples=50)
-    X = gram_matrix_if_pairwise(X, estimator_orig)
+    X = maybe_pairwise(X, estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 

From eef2accbfc33b148c39c92b97f884e7f1dd544cb Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 19 Oct 2017 00:45:01 -0400
Subject: [PATCH 30/51] cast X to numpy array in
 check_sample_weights_pandas_series() so maybe_pairwise() has access to shape
 property

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index da030d3f4ea65..7bbd39fe60d17 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -427,7 +427,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
     if has_fit_parameter(estimator, "sample_weight"):
         try:
             import pandas as pd
-            X = [[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]]
+            X = np.array([[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]])
             X = pd.DataFrame(maybe_pairwise(X, estimator_orig))
             y = pd.Series([1, 1, 1, 2, 2, 2])
             weights = pd.Series([1] * 6)

From 0bdb9366aaa95a5f8f6ecb15552ffa2f6ea48424 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 19 Oct 2017 01:07:30 -0400
Subject: [PATCH 31/51] PEP8 fix indentation

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7bbd39fe60d17..fdee87b20ee3c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -920,7 +920,7 @@ def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
     X_train_finite = maybe_pairwise(rnd.uniform(size=(10, 3)),
-                                             estimator_orig)
+                                    estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
     X_train_inf = rnd.uniform(size=(10, 3))

From 84d86156b18e8a4c08109bd269ad9f644f25c9b2 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 20 Oct 2017 16:25:27 -0400
Subject: [PATCH 32/51] make is_pairwise helper functions private. Remove them
 from base. Consolidate pairwise tests into single test

---
 sklearn/base.py                              | 34 -------------
 sklearn/utils/estimator_checks.py            | 50 ++++++++++++++++----
 sklearn/utils/tests/test_estimator_checks.py | 12 ++---
 3 files changed, 44 insertions(+), 52 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 2640550ee8963..4b7055086d7ba 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -561,37 +561,3 @@ def is_regressor(estimator):
         True if estimator is a regressor and False otherwise.
     """
     return getattr(estimator, "_estimator_type", None) == "regressor"
-
-
-def is_pairwise(estimator):
-    """Returns True if estimator has a _pairwise attribute set to True.
-
-    Parameters
-    ----------
-    estimator : object
-        Estimator object to test.
-
-    Returns
-    -------
-    out : bool
-        True if _pairwise is set to True and False otherwise.
-    """
-    return bool(getattr(estimator, "_pairwise", False))
-
-
-def is_pairwise_metric(estimator):
-    """Returns True if estimator accepts pairwise metric.
-
-    Parameters
-    ----------
-    estimator : object
-        Estimator object to test.
-
-    Returns
-    -------
-    out : bool
-        True if _pairwise is set to True and False otherwise.
-    """
-    metric = getattr(estimator,  "metric", None)
-
-    return bool(metric == 'precomputed')
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index fdee87b20ee3c..d5dd89c5552ca 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -36,8 +36,8 @@
 
 
 from sklearn.base import (clone, TransformerMixin, ClusterMixin,
-                          BaseEstimator, is_classifier, is_regressor,
-                          is_pairwise, is_pairwise_metric)
+                          BaseEstimator, is_classifier, is_regressor)
+
 from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score
 
 from sklearn.random_projection import BaseRandomProjection
@@ -357,15 +357,48 @@ def _is_32bit():
     """Detect if process is 32bit Python."""
     return struct.calcsize('P') * 8 == 32
 
+def _is_pairwise(estimator):
+    """Returns True if estimator has a _pairwise attribute set to True.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if _pairwise is set to True and False otherwise.
+    """
+    return bool(getattr(estimator, "_pairwise", False))
+
+
+def _is_pairwise_metric(estimator):
+    """Returns True if estimator accepts pairwise metric.
+
+    Parameters
+    ----------
+    estimator : object
+        Estimator object to test.
+
+    Returns
+    -------
+    out : bool
+        True if _pairwise is set to True and False otherwise.
+    """
+    metric = getattr(estimator,  "metric", None)
+
+    return bool(metric == 'precomputed')
 
 def maybe_pairwise(X, estimator, kernel=linear_kernel):
 
     if len(X.shape) == 1:
         X = X.reshape(-1, 1)
 
-    if is_pairwise_metric(estimator):
+    if _is_pairwise_metric(estimator):
+        # workaround for this function
         return pairwise_distances(X, metric='mahalanobis')
-    if is_pairwise(estimator):
+    if _is_pairwise(estimator):
         return kernel(X, X)
 
     return X
@@ -373,8 +406,7 @@ def maybe_pairwise(X, estimator, kernel=linear_kernel):
 
 def check_estimator_sparse_data(name, estimator_orig):
 
-    # Sparse precomputed kernels aren't supported
-    if is_pairwise(estimator_orig):
+    if _is_pairwise(estimator_orig):
         return
 
     rng = np.random.RandomState(0)
@@ -1189,7 +1221,7 @@ def check_classifiers_train(name, classifier_orig):
             assert_greater(accuracy_score(y, y_pred), 0.83)
 
         # raises error on malformed input for predict
-        if is_pairwise(classifier):
+        if _is_pairwise(classifier):
             with assert_raises(ValueError, msg="The classifier {} does not"
                                " raise an error when the number of features "
                                "in predict is not equal to (n_test_samples,"
@@ -1216,7 +1248,7 @@ def check_classifiers_train(name, classifier_orig):
                     assert_array_equal(np.argmax(decision, axis=1), y_pred)
 
                 # raises error on malformed input for decision_function
-                if not is_pairwise(classifier):
+                if not _is_pairwise(classifier):
                     with assert_raises(ValueError, msg="The classifier {} does"
                                        " not raise an error when the number "
                                        "of features in decision_function is "
@@ -1472,7 +1504,7 @@ def check_class_weight_classifiers(name, classifier_orig):
                                                             random_state=0)
 
         # can't use gram_if_pairwise() here, setting up gram matrix manually
-        if is_pairwise(classifier_orig):
+        if _is_pairwise(classifier_orig):
             X_test = rbf_kernel(X_test, X_train)
             X_train = rbf_kernel(X_train, X_train)
 
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 393a09c438e23..2323f8a634eb2 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -8,7 +8,6 @@
 from sklearn.utils.testing import (assert_raises_regex, assert_true,
                                    assert_equal, ignore_warnings)
 from sklearn.utils.estimator_checks import check_estimator
-from sklearn.utils.estimator_checks import is_pairwise, is_pairwise_metric
 from sklearn.utils.estimator_checks import set_random_state
 from sklearn.utils.estimator_checks import set_checking_parameters
 from sklearn.utils.estimator_checks import check_estimators_unfitted
@@ -258,17 +257,12 @@ def __init__(self):
 
 def test_check_estimator_pairwise():
     # check that check_estimator() works on estimator with _pairwise
-    # attribute set
+    # kernel or  metric
 
+    # test precomputed kernel
     est = SVC(kernel='precomputed')
-    assert(is_pairwise(est))
     check_estimator(est)
 
-
-def test_check_estimator_pairwise_metric():
-    # check that check_estimator works for estimator that is based on
-    # a precomputed metric
-
+    # test precomputed metric
     est = KNeighborsRegressor(metric='precomputed')
-    assert(is_pairwise_metric(est))
     check_estimator(est)

From f7b76d9a4245ea19c1df32ed72a2fb529b76e7e7 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 23 Oct 2017 15:57:55 -0400
Subject: [PATCH 33/51] make check_estimator_sparse_data() acknowledge 'Sparse'
 as well as 'sparse'

---
 sklearn/neighbors/regression.py   | 2 +-
 sklearn/utils/estimator_checks.py | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index bd2ffb9b82489..461e14b433956 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -139,7 +139,7 @@ def predict(self, X):
         y : array of int, shape = [n_samples] or [n_samples, n_outputs]
             Target values
         """
-        X = check_array(X, accept_sparse='csr')
+        X = check_array(X, accept_sparse=False)
 
         neigh_dist, neigh_ind = self.kneighbors(X)
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d5dd89c5552ca..d118dec321c92 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -406,9 +406,6 @@ def maybe_pairwise(X, estimator, kernel=linear_kernel):
 
 def check_estimator_sparse_data(name, estimator_orig):
 
-    if _is_pairwise(estimator_orig):
-        return
-
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
@@ -438,7 +435,7 @@ def check_estimator_sparse_data(name, estimator_orig):
                 probs = estimator.predict_proba(X)
                 assert_equal(probs.shape, (X.shape[0], 4))
         except TypeError as e:
-            if 'sparse' not in repr(e):
+            if 'sparse' not in repr(e).lower():
                 print("Estimator %s doesn't seem to fail gracefully on "
                       "sparse data: error message state explicitly that "
                       "sparse input is not supported if this is not the case."

From 42a508d27f8b19a3faa516c87e06f0b946b66469 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 24 Oct 2017 10:57:05 -0400
Subject: [PATCH 34/51] remove kneighbors tests for sparse matricies, not
 supported

---
 sklearn/neighbors/tests/test_neighbors.py | 43 -----------------------
 1 file changed, 43 deletions(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 052c83c71d2e7..fc7faac599879 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -484,29 +484,6 @@ def test_RadiusNeighborsClassifier_multioutput():
         assert_array_almost_equal(y_pred_mo, y_pred_so)
 
 
-def test_kneighbors_classifier_sparse(n_samples=40,
-                                      n_features=5,
-                                      n_test_pts=10,
-                                      n_neighbors=5,
-                                      random_state=0):
-    # Test k-NN classifier on sparse matrices
-    # Like the above, but with various types of sparse matrices
-    rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features) - 1
-    X *= X > .2
-    y = ((X ** 2).sum(axis=1) < .5).astype(np.int)
-
-    for sparsemat in SPARSE_TYPES:
-        knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
-                                             algorithm='auto')
-        knn.fit(sparsemat(X), y)
-        epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
-        for sparsev in SPARSE_TYPES + (np.asarray,):
-            X_eps = sparsev(X[:n_test_pts] + epsilon)
-            y_pred = knn.predict(X_eps)
-            assert_array_equal(y_pred, y[:n_test_pts])
-
-
 def test_KNeighborsClassifier_multioutput():
     # Test k-NN classifier on multioutput data
     rng = check_random_state(0)
@@ -716,26 +693,6 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40,
         assert_true(np.all(np.abs(y_pred - y_target) < 0.3))
 
 
-def test_kneighbors_regressor_sparse(n_samples=40,
-                                     n_features=5,
-                                     n_test_pts=10,
-                                     n_neighbors=5,
-                                     random_state=0):
-    # Test radius-based regression on sparse matrices
-    # Like the above, but with various types of sparse matrices
-    rng = np.random.RandomState(random_state)
-    X = 2 * rng.rand(n_samples, n_features) - 1
-    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)
-
-    for sparsemat in SPARSE_TYPES:
-        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
-                                            algorithm='auto')
-        knn.fit(sparsemat(X), y)
-        for sparsev in SPARSE_OR_DENSE:
-            X2 = sparsev(X)
-            assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
-
-
 def test_neighbors_iris():
     # Sanity checks on the iris dataset
     # Puts three points of each label in the plane and performs a

From adf110a76d25bda20ee611b65ee803eab1a64791 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 24 Oct 2017 16:12:37 -0400
Subject: [PATCH 35/51] bring tests for kneighbors on sparse data back, check
 for ValueError

---
 sklearn/neighbors/regression.py           |  6 ++++
 sklearn/neighbors/tests/test_neighbors.py | 44 +++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index 461e14b433956..81628f4877979 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -9,6 +9,7 @@
 # License: BSD 3 clause (C) INRIA, University of Amsterdam
 
 import numpy as np
+from scipy.sparse import issparse
 
 from .base import _get_weights, _check_weights, NeighborsBase, KNeighborsMixin
 from .base import RadiusNeighborsMixin, SupervisedFloatMixin
@@ -139,6 +140,11 @@ def predict(self, X):
         y : array of int, shape = [n_samples] or [n_samples, n_outputs]
             Target values
         """
+        if issparse(X):
+            raise ValueError(
+                "Sparse matricies not supported for prediction. "
+                "Densify your matrix."
+            )
         X = check_array(X, accept_sparse=False)
 
         neigh_dist, neigh_ind = self.kneighbors(X)
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index fc7faac599879..d4a1fe373cedd 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -484,6 +484,29 @@ def test_RadiusNeighborsClassifier_multioutput():
         assert_array_almost_equal(y_pred_mo, y_pred_so)
 
 
+def test_kneighbors_classifier_sparse(n_samples=40,
+                                      n_features=5,
+                                      n_test_pts=10,
+                                      n_neighbors=5,
+                                      random_state=0):
+    # Test k-NN classifier on sparse matrices
+    # Like the above, but with various types of sparse matrices
+    rng = np.random.RandomState(random_state)
+    X = 2 * rng.rand(n_samples, n_features) - 1
+    X *= X > .2
+    y = ((X ** 2).sum(axis=1) < .5).astype(np.int)
+
+    for sparsemat in SPARSE_TYPES:
+        knn = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors,
+                                             algorithm='auto')
+        knn.fit(sparsemat(X), y)
+        epsilon = 1e-5 * (2 * rng.rand(1, n_features) - 1)
+        for sparsev in SPARSE_TYPES + (np.asarray,):
+            X_eps = sparsev(X[:n_test_pts] + epsilon)
+            y_pred = knn.predict(X_eps)
+            assert_array_equal(y_pred, y[:n_test_pts])
+
+
 def test_KNeighborsClassifier_multioutput():
     # Test k-NN classifier on multioutput data
     rng = check_random_state(0)
@@ -693,6 +716,27 @@ def test_RadiusNeighborsRegressor_multioutput(n_samples=40,
         assert_true(np.all(np.abs(y_pred - y_target) < 0.3))
 
 
+def test_kneighbors_regressor_sparse(n_samples=40,
+                                     n_features=5,
+                                     n_test_pts=10,
+                                     n_neighbors=5,
+                                     random_state=0):
+    # Test radius-based regression on sparse matrices
+    # Like the above, but with various types of sparse matrices
+    rng = np.random.RandomState(random_state)
+    X = 2 * rng.rand(n_samples, n_features) - 1
+    y = ((X ** 2).sum(axis=1) < .25).astype(np.int)
+
+    for sparsemat in SPARSE_TYPES:
+        knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
+                                            algorithm='auto')
+        knn.fit(sparsemat(X), y)
+        for sparsev in SPARSE_OR_DENSE:
+            X2 = sparsev(X)
+            assert_raises(ValueError, knn.predict, csr_matrix(X2))
+            # assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
+
+
 def test_neighbors_iris():
     # Sanity checks on the iris dataset
     # Puts three points of each label in the plane and performs a

From 35ae37288bc1c73b7447365d5a155ee0508368b9 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 24 Oct 2017 17:23:08 -0400
Subject: [PATCH 36/51] fix check_estimator_sparse_data() test to account for
 TypeError and ValueError for sparse matricies

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d118dec321c92..1f4b21b8f0119 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -434,7 +434,7 @@ def check_estimator_sparse_data(name, estimator_orig):
             if hasattr(estimator, 'predict_proba'):
                 probs = estimator.predict_proba(X)
                 assert_equal(probs.shape, (X.shape[0], 4))
-        except TypeError as e:
+        except (TypeError, ValueError) as e:
             if 'sparse' not in repr(e).lower():
                 print("Estimator %s doesn't seem to fail gracefully on "
                       "sparse data: error message state explicitly that "

From 688376fb3168d96f8d6ba86b3a8dd2b94b8decb8 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 24 Oct 2017 19:00:48 -0400
Subject: [PATCH 37/51] PEP8 add newlines

---
 sklearn/utils/estimator_checks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 1f4b21b8f0119..3354fda2ac5fd 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -357,6 +357,7 @@ def _is_32bit():
     """Detect if process is 32bit Python."""
     return struct.calcsize('P') * 8 == 32
 
+
 def _is_pairwise(estimator):
     """Returns True if estimator has a _pairwise attribute set to True.
 
@@ -390,6 +391,7 @@ def _is_pairwise_metric(estimator):
 
     return bool(metric == 'precomputed')
 
+
 def maybe_pairwise(X, estimator, kernel=linear_kernel):
 
     if len(X.shape) == 1:

From a3e7b64c6ad9eb26425bfbcdd06dc844a4919ffa Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 25 Oct 2017 14:25:39 -0400
Subject: [PATCH 38/51] add Y argument (the input vector again) to
 pairwise_distances() hopefully mitigate weird Travis error for
 maybe_pairwise()

---
 sklearn/utils/estimator_checks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3354fda2ac5fd..5ee13455c674b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -398,8 +398,7 @@ def maybe_pairwise(X, estimator, kernel=linear_kernel):
         X = X.reshape(-1, 1)
 
     if _is_pairwise_metric(estimator):
-        # workaround for this function
-        return pairwise_distances(X, metric='mahalanobis')
+        return pairwise_distances(X, X, metric='mahalanobis')
     if _is_pairwise(estimator):
         return kernel(X, X)
 

From 2b131f38dbac70c8f25ffdf6e13d0b0fb6802dc4 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Fri, 27 Oct 2017 10:34:55 -0400
Subject: [PATCH 39/51] use euclidean distance in maybe_pairwise() to try and
 mitigate weird travis ci error

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5ee13455c674b..da5f6dd0b898f 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -398,7 +398,7 @@ def maybe_pairwise(X, estimator, kernel=linear_kernel):
         X = X.reshape(-1, 1)
 
     if _is_pairwise_metric(estimator):
-        return pairwise_distances(X, X, metric='mahalanobis')
+        return pairwise_distances(X, metric='euclidean')
     if _is_pairwise(estimator):
         return kernel(X, X)
 

From 834918ca80bf5a1ce6c5df5db86d01a116f274b2 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 30 Oct 2017 09:52:01 -0400
Subject: [PATCH 40/51] change maybe_pairwise() name to
 pairwise_estimator_convert_X()

---
 sklearn/utils/estimator_checks.py | 46 +++++++++++++++----------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index a4b51dc233313..60bfe520b6b33 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -392,7 +392,7 @@ def _is_pairwise_metric(estimator):
     return bool(metric == 'precomputed')
 
 
-def maybe_pairwise(X, estimator, kernel=linear_kernel):
+def pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
 
     if len(X.shape) == 1:
         X = X.reshape(-1, 1)
@@ -410,7 +410,7 @@ def check_estimator_sparse_data(name, estimator_orig):
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     X_csr = sparse.csr_matrix(X)
     y = (4 * rng.rand(40)).astype(np.int)
     # catch deprecation warnings
@@ -458,7 +458,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
         try:
             import pandas as pd
             X = np.array([[1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3]])
-            X = pd.DataFrame(maybe_pairwise(X, estimator_orig))
+            X = pd.DataFrame(pairwise_estimator_convert_X(X, estimator_orig))
             y = pd.Series([1, 1, 1, 2, 2, 2])
             weights = pd.Series([1] * 6)
             try:
@@ -479,7 +479,7 @@ def check_sample_weights_list(name, estimator_orig):
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
-        X = maybe_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
+        X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)), estimator_orig)
         y = np.arange(10) % 3
         y = multioutput_estimator_convert_y_2d(estimator, y)
         sample_weight = [3] * 10
@@ -491,7 +491,7 @@ def check_sample_weights_list(name, estimator_orig):
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible
     rng = np.random.RandomState(0)
-    X = maybe_pairwise(rng.rand(40, 10), estimator_orig)
+    X = pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
     y = (X[:, 0] * 4).astype(np.int)
     estimator = clone(estimator_orig)
@@ -539,7 +539,7 @@ def check_dict_unchanged(name, estimator_orig):
     else:
         X = 2 * rnd.uniform(size=(20, 3))
 
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
@@ -578,7 +578,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     estimator = clone(estimator_orig)
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -625,7 +625,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     # check by fitting a 2d array and predicting with a 1d array
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -679,7 +679,7 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -852,7 +852,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
-    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
     set_random_state(estimator)
@@ -877,7 +877,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     # in fit and score so they can be used in pipelines
     rnd = np.random.RandomState(0)
     X = rnd.uniform(size=(10, 3))
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -903,7 +903,7 @@ def check_fit_score_takes_y(name, estimator_orig):
 def check_estimators_dtypes(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
-    X_train_32 = maybe_pairwise(X_train_32, estimator_orig)
+    X_train_32 = pairwise_estimator_convert_X(X_train_32, estimator_orig)
     X_train_64 = X_train_32.astype(np.float64)
     X_train_int_64 = X_train_32.astype(np.int64)
     X_train_int_32 = X_train_32.astype(np.int32)
@@ -949,7 +949,7 @@ def check_estimators_empty_data_messages(name, estimator_orig):
 def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
-    X_train_finite = maybe_pairwise(rnd.uniform(size=(10, 3)),
+    X_train_finite = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
                                     estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
@@ -1027,7 +1027,7 @@ def check_estimators_pickle(name, estimator_orig):
 
     # some estimators can't do features less than 0
     X -= X.min()
-    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
 
     estimator = clone(estimator_orig)
 
@@ -1202,7 +1202,7 @@ def check_classifiers_train(name, classifier_orig):
         classifier = clone(classifier_orig)
         if name in ['BernoulliNB', 'MultinomialNB', 'ComplementNB']:
             X -= X.min()
-        X = maybe_pairwise(X, classifier_orig)
+        X = pairwise_estimator_convert_X(X, classifier_orig)
         set_random_state(classifier)
         # raises error on malformed input for fit
         with assert_raises(ValueError, msg="The classifer {} does not"
@@ -1286,7 +1286,7 @@ def check_estimators_fit_returns_self(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9, n_features=4)
     # some want non-negative input
     X -= X.min()
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
 
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
@@ -1334,7 +1334,7 @@ def check_supervised_y_2d(name, estimator_orig):
         # These only work on 2d, so this test makes no sense
         return
     rnd = np.random.RandomState(0)
-    X = maybe_pairwise(rnd.uniform(size=(10, 3)), estimator_orig)
+    X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)), estimator_orig)
     y = np.arange(10) % 3
     estimator = clone(estimator_orig)
     set_random_state(estimator)
@@ -1368,7 +1368,7 @@ def check_classifiers_classes(name, classifier_orig):
     # We need to make sure that we have non negative data, for things
     # like NMF
     X -= X.min() - .1
-    X = maybe_pairwise(X, classifier_orig)
+    X = pairwise_estimator_convert_X(X, classifier_orig)
     y_names = np.array(["one", "two", "three"])[y]
 
     for y_names in [y_names, y_names.astype('O')]:
@@ -1400,7 +1400,7 @@ def check_classifiers_classes(name, classifier_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_int(name, regressor_orig):
     X, _ = _boston_subset()
-    X = maybe_pairwise(X[:50], regressor_orig)
+    X = pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
     y = multioutput_estimator_convert_y_2d(regressor_orig, y)
@@ -1428,7 +1428,7 @@ def check_regressors_int(name, regressor_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_train(name, regressor_orig):
     X, y = _boston_subset()
-    X = maybe_pairwise(X, regressor_orig)
+    X = pairwise_estimator_convert_X(X, regressor_orig)
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
     regressor = clone(regressor_orig)
@@ -1594,7 +1594,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=9)
     # some want non-negative input
     X -= X.min()
-    X = maybe_pairwise(X, estimator_orig, kernel=rbf_kernel)
+    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator, y)
 
@@ -1669,7 +1669,7 @@ def check_sparsify_coefficients(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_classifier_data_not_an_array(name, estimator_orig):
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1]])
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = [1, 1, 1, 2, 2, 2]
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
@@ -1678,7 +1678,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
     X, y = _boston_subset(n_samples=50)
-    X = maybe_pairwise(X, estimator_orig)
+    X = pairwise_estimator_convert_X(X, estimator_orig)
     y = multioutput_estimator_convert_y_2d(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 

From 5032f3591bf7f98900ddba53956105d44b3079b1 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 30 Oct 2017 11:20:09 -0400
Subject: [PATCH 41/51] change test_kneighbors_regressor_sparse() to only check
 for error on precomputed sparse X. Re-allow KNeighborsRegressor to predict
 sparse X (that's not precomputed)

---
 sklearn/neighbors/regression.py           | 8 ++++----
 sklearn/neighbors/tests/test_neighbors.py | 7 +++++--
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index 81628f4877979..9fc65303dac8d 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -140,12 +140,12 @@ def predict(self, X):
         y : array of int, shape = [n_samples] or [n_samples, n_outputs]
             Target values
         """
-        if issparse(X):
+        if issparse(X) and self.metric == 'precomputed':
             raise ValueError(
-                "Sparse matricies not supported for prediction. "
-                "Densify your matrix."
+                "Sparse matricies not supported for prediction with "
+                "precomputed kernels. Densify your matrix."
             )
-        X = check_array(X, accept_sparse=False)
+        X = check_array(X, accept_sparse='csr')
 
         neigh_dist, neigh_ind = self.kneighbors(X)
 
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index d4a1fe373cedd..31e0a8c7e4d39 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -733,8 +733,11 @@ def test_kneighbors_regressor_sparse(n_samples=40,
         knn.fit(sparsemat(X), y)
         for sparsev in SPARSE_OR_DENSE:
             X2 = sparsev(X)
-            assert_raises(ValueError, knn.predict, csr_matrix(X2))
-            # assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
+            # sparse precomputed distance matrices not supported for prediction
+            if knn.metric == 'precomputed':
+                assert_raises(ValueError, knn.predict, csr_matrix(X2))
+            else:
+                assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
 
 
 def test_neighbors_iris():

From 892771f3af303d114f12d3a5ad43094546979b78 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 30 Oct 2017 11:22:07 -0400
Subject: [PATCH 42/51] PEP8 fix line length

---
 sklearn/utils/estimator_checks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 60bfe520b6b33..176daeecc2ebd 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -479,7 +479,8 @@ def check_sample_weights_list(name, estimator_orig):
     if has_fit_parameter(estimator_orig, "sample_weight"):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
-        X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)), estimator_orig)
+        X = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
+                                         estimator_orig)
         y = np.arange(10) % 3
         y = multioutput_estimator_convert_y_2d(estimator, y)
         sample_weight = [3] * 10

From ad658394f93fc84b02439965f4468c24a3c56392 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 30 Oct 2017 11:53:55 -0400
Subject: [PATCH 43/51] PEP8 again

---
 sklearn/utils/estimator_checks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 176daeecc2ebd..93a903c5997f3 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -951,7 +951,7 @@ def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
     X_train_finite = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
-                                    estimator_orig)
+                                                  estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
     X_train_inf = rnd.uniform(size=(10, 3))

From b56899b1ed5ffdcd84a1555e83f7c6aeb5e370d8 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 31 Oct 2017 20:18:37 -0400
Subject: [PATCH 44/51] change check_classifiers_train() test to check pairwise
 eatimators for decision_function and predict_proba

---
 sklearn/utils/estimator_checks.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 93a903c5997f3..d0543afa3e919 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1227,7 +1227,7 @@ def check_classifiers_train(name, classifier_orig):
         # raises error on malformed input for predict
         if _is_pairwise(classifier):
             with assert_raises(ValueError, msg="The classifier {} does not"
-                               " raise an error when the number of features "
+                               " raise an error when shape of X"
                                "in predict is not equal to (n_test_samples,"
                                "n_training_samples)".format(name)):
                 classifier.predict(X.reshape(-1, 1))
@@ -1252,7 +1252,14 @@ def check_classifiers_train(name, classifier_orig):
                     assert_array_equal(np.argmax(decision, axis=1), y_pred)
 
                 # raises error on malformed input for decision_function
-                if not _is_pairwise(classifier):
+                if _is_pairwise(classifier):
+                    with assert_raises(ValueError, msg="The classifier {} does"
+                                       " not raise an error when the  "
+                                       "shape of X in decision_function is "
+                                       "not equal to (n_test_samples, "
+                                       "n_training_samples) in fit.".format(name)):
+                        classifier.decision_function(X.reshape(-1, 1))
+                else:
                     with assert_raises(ValueError, msg="The classifier {} does"
                                        " not raise an error when the number "
                                        "of features in decision_function is "
@@ -1269,11 +1276,19 @@ def check_classifiers_train(name, classifier_orig):
             # check that probas for all classes sum to one
             assert_allclose(np.sum(y_prob, axis=1), np.ones(n_samples))
             # raises error on malformed input for predict_proba
-            with assert_raises(ValueError, msg="The classifier {} does not"
-                               " raise an error when the number of features "
-                               "in predict_proba is different from the number "
-                               "of features in fit.".format(name)):
-                classifier.predict_proba(X.T)
+            if _is_pairwise(classifier_orig):
+                with assert_raises(ValueError, msg="The classifier {} does not"
+                                   " raise an error when the shape of X"
+                                   "in predict_proba is not equal to "
+                                   "(n_test_samples, n_training_samples)."\
+                                   .format(name)):
+                    classifier.predict_proba(X.reshape(-1, 1))
+            else:
+                with assert_raises(ValueError, msg="The classifier {} does not"
+                                   " raise an error when the number of features "
+                                   "in predict_proba is different from the number "
+                                   "of features in fit.".format(name)):
+                    classifier.predict_proba(X.T)
             if hasattr(classifier, "predict_log_proba"):
                 # predict_log_proba is a transformation of predict_proba
                 y_log_prob = classifier.predict_log_proba(X)

From e055fb83b19c65a4e1aa33959709e14e45fb7fa2 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Tue, 31 Oct 2017 20:59:07 -0400
Subject: [PATCH 45/51] PEP8 line length fix

---
 sklearn/utils/estimator_checks.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index d0543afa3e919..7355f0179b06b 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1257,7 +1257,8 @@ def check_classifiers_train(name, classifier_orig):
                                        " not raise an error when the  "
                                        "shape of X in decision_function is "
                                        "not equal to (n_test_samples, "
-                                       "n_training_samples) in fit.".format(name)):
+                                       "n_training_samples) in fit."
+                                       .format(name)):
                         classifier.decision_function(X.reshape(-1, 1))
                 else:
                     with assert_raises(ValueError, msg="The classifier {} does"
@@ -1280,14 +1281,15 @@ def check_classifiers_train(name, classifier_orig):
                 with assert_raises(ValueError, msg="The classifier {} does not"
                                    " raise an error when the shape of X"
                                    "in predict_proba is not equal to "
-                                   "(n_test_samples, n_training_samples)."\
+                                   "(n_test_samples, n_training_samples)."
                                    .format(name)):
                     classifier.predict_proba(X.reshape(-1, 1))
             else:
                 with assert_raises(ValueError, msg="The classifier {} does not"
-                                   " raise an error when the number of features "
-                                   "in predict_proba is different from the number "
-                                   "of features in fit.".format(name)):
+                                   " raise an error when the number of "
+                                   "features in predict_proba is different "
+                                   "from the number of features in fit."
+                                   .format(name)):
                     classifier.predict_proba(X.T)
             if hasattr(classifier, "predict_log_proba"):
                 # predict_log_proba is a transformation of predict_proba

From efeb0671261bc41c98e2566d5251171b4045dc6b Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Wed, 1 Nov 2017 12:21:23 -0400
Subject: [PATCH 46/51] update whats_new with changes to estimator checks

---
 doc/whats_new/v0.20.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 0897f331ebda0..9faef68812ea7 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -181,3 +181,9 @@ Cluster
 - Deprecate ``pooling_func`` unused parameter in
   :class:`cluster.AgglomerativeClustering`. :issue:`9875` by :user:`Kumar Ashutosh
   <thechargedneutron>`.
+
+Changes to estimator checks
+---------------------------
+
+- Pairwise Estimators
+

From 3116c23d4363c2b7b06d158395769285ec53d362 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Sun, 5 Nov 2017 18:47:43 -0500
Subject: [PATCH 47/51] add change details to whats_new

---
 doc/whats_new/v0.20.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 9faef68812ea7..812f8daacdcca 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -185,5 +185,6 @@ Cluster
 Changes to estimator checks
 ---------------------------
 
-- Pairwise Estimators
-
+- Allow tests in :func:`estimator_checks.check_estimator` to test functions
+  that accept pairwise data.
+  :issue:`9701` by :user:`Andreas Mueller <amueller>`

From 42fa8f413a886828f1d9ce8fce0ef1055b901ae3 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 6 Nov 2017 11:59:48 -0500
Subject: [PATCH 48/51] remove unused lines in
 estimator_checks.pairwise_estimator_convert_X()

---
 sklearn/utils/estimator_checks.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7355f0179b06b..40fcb1fdd069f 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -394,9 +394,6 @@ def _is_pairwise_metric(estimator):
 
 def pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
 
-    if len(X.shape) == 1:
-        X = X.reshape(-1, 1)
-
     if _is_pairwise_metric(estimator):
         return pairwise_distances(X, metric='euclidean')
     if _is_pairwise(estimator):

From 69d78764e735723204aa2754fcc7e5fc0695791f Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Mon, 6 Nov 2017 12:11:09 -0500
Subject: [PATCH 49/51] remove assert_raises() for precomputed metric in
 test_kneighbors_regressor_sparse(). Already checked using
 test_check_estimator_pairwise()

---
 doc/whats_new/v0.20.rst                   | 2 +-
 sklearn/neighbors/regression.py           | 2 +-
 sklearn/neighbors/tests/test_neighbors.py | 6 +-----
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 812f8daacdcca..13734bb828660 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -187,4 +187,4 @@ Changes to estimator checks
 
 - Allow tests in :func:`estimator_checks.check_estimator` to test functions
   that accept pairwise data.
-  :issue:`9701` by :user:`Andreas Mueller <amueller>`
+  :issue:`9701` by :user:`Kyle Johnson <gkjohns>`
diff --git a/sklearn/neighbors/regression.py b/sklearn/neighbors/regression.py
index 9fc65303dac8d..b13f16cfd399e 100644
--- a/sklearn/neighbors/regression.py
+++ b/sklearn/neighbors/regression.py
@@ -142,7 +142,7 @@ def predict(self, X):
         """
         if issparse(X) and self.metric == 'precomputed':
             raise ValueError(
-                "Sparse matricies not supported for prediction with "
+                "Sparse matrices not supported for prediction with "
                 "precomputed kernels. Densify your matrix."
             )
         X = check_array(X, accept_sparse='csr')
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 31e0a8c7e4d39..052c83c71d2e7 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -733,11 +733,7 @@ def test_kneighbors_regressor_sparse(n_samples=40,
         knn.fit(sparsemat(X), y)
         for sparsev in SPARSE_OR_DENSE:
             X2 = sparsev(X)
-            # sparse precomputed distance matrices not supported for prediction
-            if knn.metric == 'precomputed':
-                assert_raises(ValueError, knn.predict, csr_matrix(X2))
-            else:
-                assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
+            assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
 
 
 def test_neighbors_iris():

From 44f4dd657de8d9163a8aa00a730fd7d0007d95b6 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 9 Nov 2017 16:53:43 -0500
Subject: [PATCH 50/51] check if test data is sparse, check for ValueError
 instead of accuracy if yes

---
 sklearn/neighbors/tests/test_neighbors.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 052c83c71d2e7..89baada19fff4 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from scipy.sparse import (bsr_matrix, coo_matrix, csc_matrix, csr_matrix,
-                          dok_matrix, lil_matrix)
+                          dok_matrix, lil_matrix, issparse)
 
 from sklearn import metrics
 from sklearn import neighbors, datasets
@@ -731,10 +731,22 @@ def test_kneighbors_regressor_sparse(n_samples=40,
         knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
                                             algorithm='auto')
         knn.fit(sparsemat(X), y)
+
+        knn_pre = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors,
+                                                metric='precomputed')
+        knn_pre.fit(pairwise_distances(X, metric='euclidean'), y)
+
         for sparsev in SPARSE_OR_DENSE:
             X2 = sparsev(X)
             assert_true(np.mean(knn.predict(X2).round() == y) > 0.95)
 
+            X2_pre = sparsev(pairwise_distances(X, metric='euclidean'))
+            if issparse(sparsev(X2_pre)):
+                assert_raises(ValueError, knn_pre.predict, X2_pre)
+            else:
+                assert_true(\
+                    np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95)
+
 
 def test_neighbors_iris():
     # Sanity checks on the iris dataset

From b5ede88436d69866ec648448de6a9aba18b0a4f5 Mon Sep 17 00:00:00 2001
From: gkjohns <gkjohns@gmail.com>
Date: Thu, 9 Nov 2017 16:55:09 -0500
Subject: [PATCH 51/51] remove redundant backslash

---
 sklearn/neighbors/tests/test_neighbors.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index 89baada19fff4..ceb53412018b8 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -744,7 +744,7 @@ def test_kneighbors_regressor_sparse(n_samples=40,
             if issparse(sparsev(X2_pre)):
                 assert_raises(ValueError, knn_pre.predict, X2_pre)
             else:
-                assert_true(\
+                assert_true(
                     np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95)