migrate svdd code style to Black (scikit-learn#18948)

ivannz · ivannz · commit b0f4926d40aa · 2022-09-04T12:11:09.000+03:00
ensure SVDD passes numpydoc validation (scikit-learn#20463) check for svdd in `test_sparse.py:check_svm_model_equal` to avoid calling `.predict_proba`
diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py
@@ -10,8 +10,7 @@
 #         of their respective owners.
 # License: BSD 3 clause (C) INRIA 2010
 
-from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, \
-        LinearSVR, SVDD
+from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR, SVDD
 from ._bounds import l1_min_c
 
 __all__ = [
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
@@ -27,8 +27,7 @@
 from ..exceptions import NotFittedError
 
 
-LIBSVM_IMPL = ['c_svc', 'nu_svc', 'one_class', 'epsilon_svr', 'nu_svr',
-               'svdd_l1']
+LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr", "svdd_l1"]
 
 
 def _one_vs_one_coef(dual_coef, n_support, support_vectors):
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
@@ -1932,16 +1932,6 @@ class SVDD(OutlierMixin, BaseLibSVM):
     support_vectors_ : ndarray of shape (n_SV, n_features)
         Support vectors.
 
-    Examples
-    --------
-    >>> from sklearn.svm import SVDD
-    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
-    >>> clf = SVDD(gamma='auto').fit(X)
-    >>> clf.predict(X)
-    array([-1,  1,  1,  1, -1])
-    >>> clf.score_samples(X)
-    array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
-
     See Also
     --------
     OneClassSVM : Support vector method for outlier detection via a separating
@@ -1958,47 +1948,82 @@ class SVDD(OutlierMixin, BaseLibSVM):
            to support vector data description (SVDD)." Technical
            Report, Department of Computer Science, National Taiwan
            University.
+
+    Examples
+    --------
+    >>> from sklearn.svm import SVDD
+    >>> X = [[0], [0.44], [0.45], [0.46], [1]]
+    >>> clf = SVDD(gamma='auto').fit(X)
+    >>> clf.predict(X)
+    array([-1,  1,  1,  1, -1])
+    >>> clf.score_samples(X)
+    array([0.5298..., 0.8047..., 0.8056..., 0.8061..., 0.4832...])
     """
 
-    _impl = 'svdd_l1'
+    _impl = "svdd_l1"
 
-    def __init__(self, *, kernel='rbf', degree=3, gamma='scale',
-                 coef0=0.0, tol=1e-3, nu=0.5, shrinking=True, cache_size=200,
-                 verbose=False, max_iter=-1):
+    def __init__(
+        self,
+        *,
+        kernel="rbf",
+        degree=3,
+        gamma="scale",
+        coef0=0.0,
+        tol=1e-3,
+        nu=0.5,
+        shrinking=True,
+        cache_size=200,
+        verbose=False,
+        max_iter=-1,
+    ):
 
         super().__init__(
-            kernel=kernel, degree=degree, gamma=gamma, coef0=coef0,
-            tol=tol, C=0., nu=nu, epsilon=0., shrinking=shrinking,
-            probability=False, cache_size=cache_size, class_weight=None,
-            verbose=verbose, max_iter=max_iter, random_state=None)
+            kernel=kernel,
+            degree=degree,
+            gamma=gamma,
+            coef0=coef0,
+            tol=tol,
+            C=0.0,
+            nu=nu,
+            epsilon=0.0,
+            shrinking=shrinking,
+            probability=False,
+            cache_size=cache_size,
+            class_weight=None,
+            verbose=verbose,
+            max_iter=max_iter,
+            random_state=None,
+        )
 
     def fit(self, X, y=None, sample_weight=None, **params):
-        """Learns the soft minimum volume hypersphere around the sample X.
+        """Learn a soft minimum-volume hypersphere around the sample X.
 
         Parameters
         ----------
         X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Set of samples, where n_samples is the number of samples and
             n_features is the number of features.
 
+        y : Ignored
+            Not used, present for API consistency by convention.
+
         sample_weight : array-like of shape (n_samples,), default=None
             Per-sample weights. Rescale C per sample. Higher weights
             force the classifier to put more emphasis on these points.
 
-        y : Ignored
-            not used, present for API consistency by convention.
+        **params : dict
+            Additional fit parameters.
 
         Returns
         -------
         self : object
+            Fitted estimator.
 
         Notes
         -----
         If X is not a C-ordered contiguous array it is copied.
-
         """
-        super().fit(X, np.ones(_num_samples(X)),
-                    sample_weight=sample_weight, **params)
+        super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight, **params)
         self.offset_ = -self._intercept_
         return self
 
@@ -2056,8 +2081,9 @@ def predict(self, X):
 
     def _more_tags(self):
         return {
-            '_xfail_checks': {
-                'check_sample_weights_invariance':
-                'zero sample_weight is not equivalent to removing samples',
+            "_xfail_checks": {
+                "check_sample_weights_invariance": (
+                    "zero sample_weight is not equivalent to removing samples"
+                ),
             }
         }
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
@@ -74,6 +74,8 @@ def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
     )
     if isinstance(dense_svm, svm.OneClassSVM):
         msg = "cannot use sparse input in 'OneClassSVM' trained on dense data"
+    elif isinstance(dense_svm, svm.SVDD):
+        msg = "cannot use sparse input in 'SVDD' trained on dense data"
     else:
         assert_array_almost_equal(
             dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4
@@ -336,20 +338,22 @@ def test_sparse_oneclasssvm(datasets_index, kernel):
 
 
 def test_sparse_svdd():
-    """Check that sparse SVDD gives the same result as dense SVDD
-    """
+    """Check that sparse SVDD gives the same result as dense SVDD"""
     # many class dataset:
     X_blobs, _ = make_blobs(n_samples=100, centers=10, random_state=0)
     X_blobs = sparse.csr_matrix(X_blobs)
 
-    datasets = [[X_sp, None, T], [X2_sp, None, T2],
-                [X_blobs[:80], None, X_blobs[80:]],
-                [iris.data, None, iris.data]]
+    datasets = [
+        [X_sp, None, T],
+        [X2_sp, None, T2],
+        [X_blobs[:80], None, X_blobs[80:]],
+        [iris.data, None, iris.data],
+    ]
     kernels = ["linear", "poly", "rbf", "sigmoid"]
     for dataset in datasets:
         for kernel in kernels:
-            clf = svm.SVDD(gamma='scale', kernel=kernel)
-            sp_clf = svm.SVDD(gamma='scale', kernel=kernel)
+            clf = svm.SVDD(gamma="scale", kernel=kernel)
+            sp_clf = svm.SVDD(gamma="scale", kernel=kernel)
             check_svm_model_equal(clf, sp_clf, *dataset)
 
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
@@ -364,16 +364,16 @@ def test_oneclass_fit_params_is_deprecated():
 
 def test_svdd():
     # Test the output of libsvm for the SVDD problem with default parameters
-    clf = svm.SVDD(gamma='scale')
+    clf = svm.SVDD(gamma="scale")
     clf.fit(X)
     pred = clf.predict(T)
 
     assert_array_equal(pred, [+1, -1, -1])
-    assert pred.dtype == np.dtype('intp')
+    assert pred.dtype == np.dtype("intp")
     assert_array_almost_equal(clf.intercept_, [0.2817], decimal=3)
-    assert_array_almost_equal(clf.dual_coef_,
-                              [[0.7500, 0.7499, 0.7499, 0.7500]],
-                              decimal=3)
+    assert_array_almost_equal(
+        clf.dual_coef_, [[0.7500, 0.7499, 0.7499, 0.7500]], decimal=3
+    )
     assert not hasattr(clf, "coef_")
 
 
@@ -397,15 +397,15 @@ def test_svdd_decision_function():
     X_outliers = rnd.uniform(low=-4, high=4, size=(20, 2))
 
     # fit the model
-    clf = svm.SVDD(gamma='scale', nu=0.1,
-                   kernel="poly", degree=2, coef0=1.0).fit(X_train)
+    clf = svm.SVDD(gamma="scale", nu=0.1, kernel="poly", degree=2, coef0=1.0)
+    clf.fit(X_train)
 
     # predict and validate things
     y_pred_test = clf.predict(X_test)
-    assert np.mean(y_pred_test == 1) > .9
+    assert np.mean(y_pred_test == 1) > 0.9
 
     y_pred_outliers = clf.predict(X_outliers)
-    assert np.mean(y_pred_outliers == -1) > .65
+    assert np.mean(y_pred_outliers == -1) > 0.65
 
     dec_func_test = clf.decision_function(X_test)
     assert_array_equal((dec_func_test > 0).ravel(), y_pred_test == 1)
@@ -436,28 +436,30 @@ def test_svdd_score_samples():
     X_train = np.r_[X + 2, X - 2]
 
     # Evaluate the scores on a small uniform 2-d mesh
-    xx, yy = np.meshgrid(np.linspace(-5, 5, num=26),
-                         np.linspace(-5, 5, num=26))
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=26), np.linspace(-5, 5, num=26))
     X_test = np.c_[xx.ravel(), yy.ravel()]
 
     # Fit the model for at least 10% support vectors
-    clf = svm.SVDD(nu=0.1, kernel="poly", gamma='scale', degree=2, coef0=1.0)
+    clf = svm.SVDD(nu=0.1, kernel="poly", gamma="scale", degree=2, coef0=1.0)
     clf.fit(X_train)
 
     # Check score_samples() implementation
-    assert_array_almost_equal(clf.score_samples(X_test),
-                              clf.decision_function(X_test) + clf.offset_)
+    assert_array_almost_equal(
+        clf.score_samples(X_test), clf.decision_function(X_test) + clf.offset_
+    )
 
     # Test the gamma="scale": use .var() for scaling (c.f. issue #12741)
     gamma = 1.0 / (X.shape[1] * X_train.var())
 
     assert_almost_equal(clf._gamma, gamma)
 
     # Compute the kernel matrices
-    k_zx = polynomial_kernel(X_train[clf.support_], X_test,
-                             gamma=gamma, degree=clf.degree, coef0=clf.coef0)
-    k_xx = polynomial_kernel(X_test, gamma=gamma,
-                             degree=clf.degree, coef0=clf.coef0).diagonal()
+    k_zx = polynomial_kernel(
+        X_train[clf.support_], X_test, gamma=gamma, degree=clf.degree, coef0=clf.coef0
+    )
+    k_xx = polynomial_kernel(
+        X_test, gamma=gamma, degree=clf.degree, coef0=clf.coef0
+    ).diagonal()
 
     # Compute the sample scores = decision scores without `-\rho`
     scores_ = np.dot(clf.dual_coef_, k_zx - k_xx[np.newaxis] / 2).ravel()
@@ -497,8 +499,7 @@ def test_oneclass_and_svdd():
     assert_array_almost_equal(svdd.intercept_, svdd_intercept, decimal=3)
 
     # Evaluate the decision function on a uniformly spaced 2-d mesh
-    xx, yy = np.meshgrid(np.linspace(-5, 5, num=101),
-                         np.linspace(-5, 5, num=101))
+    xx, yy = np.meshgrid(np.linspace(-5, 5, num=101), np.linspace(-5, 5, num=101))
     mesh = np.c_[xx.ravel(), yy.ravel()]
 
     svdd_df = svdd.decision_function(mesh)
@@ -1114,7 +1115,7 @@ def test_immutable_coef_property():
         svm.SVR(kernel="linear").fit(iris.data, iris.target),
         svm.NuSVR(kernel="linear").fit(iris.data, iris.target),
         svm.OneClassSVM(kernel="linear").fit(iris.data),
-        svm.SVDD(kernel='linear').fit(iris.data),
+        svm.SVDD(kernel="linear").fit(iris.data),
     ]
     for clf in svms:
         with pytest.raises(AttributeError):