Thanks to visit codestin.com
Credit goes to github.com

Skip to content

[WIP] Make SVC tests independent of SV ordering #12849

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 79 additions & 21 deletions sklearn/svm/tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import numpy as np
from numpy.testing import (assert_array_almost_equal, assert_array_equal,
assert_equal)
assert_equal, assert_allclose)
from scipy import sparse

from sklearn import datasets, svm, linear_model, base
Expand All @@ -13,6 +13,7 @@
from sklearn.utils.testing import (assert_raises, assert_warns,
assert_raise_message, ignore_warnings,
skip_if_32bit)
from sklearn.utils import shuffle


# test sample 1
Expand Down Expand Up @@ -135,20 +136,77 @@ def kfunc(x, y):
assert_array_equal(clf_lin.predict(X_sp), clf_mylin.predict(X_sp))


def test_svc_iris():
# Test the sparse SVC with the iris dataset
for k in ('linear', 'poly', 'rbf'):
sp_clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data, iris.target)
clf = svm.SVC(gamma='scale', kernel=k).fit(iris.data.toarray(),
iris.target)

assert_array_almost_equal(clf.support_vectors_,
sp_clf.support_vectors_.toarray())
assert_array_almost_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
assert_array_almost_equal(
clf.predict(iris.data.toarray()), sp_clf.predict(iris.data))
if k == 'linear':
assert_array_almost_equal(clf.coef_, sp_clf.coef_.toarray())
def _toarray(a):
if sparse.issparse(a):
return a.toarray()
return a
Comment on lines +139 to +142
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would np.asarray(a) not work?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was initial proposal in #12732

scikit-learn still needs to be able to deal with inputs where feature matrix has duplicates, it is just that the original quadratic optimization problem admits infinitely many solutions.

The test for such a case should be that the training succeeds and predictions can be made.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summarizing some IRL discussion:

We could have test that checks that the predictions (on a random test set) are the same if:

  • SVC is trained both on dense and sparse version of the same training dataset with duplicates;
  • SVC is trained on datasets with and without duplicates but with 0.5 sample weight for training points duplicate twice.

And then update the sparse / dense coef_ comparison test to use data without duplicated data-points.



def _assert_svc_equal(svc1, svc2, atol=1e-7):
# Check that 2 support vector machines parametrize the equivalent decision
# functions by comparing dual coefficients and support vectors.
assert svc1.get_params() == svc2.get_params()

sv1 = _toarray(svc1.support_vectors_)
dc1 = _toarray(svc1.dual_coef_)
sv2 = _toarray(svc2.support_vectors_)
dc2 = _toarray(svc2.dual_coef_)
assert dc1.shape == dc2.shape
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually sets of support vectors may be different. If the input contains duplicates samples, and that point happens to be a support vector, then support_vectors_ may contains a single, or multiple entries depending on history of accumulation of floating point errors.

assert sv1.shape == sv2.shape

# Consider each OvO binary classification problem in turn.
ovo_dim, n_support_vectors = dc1.shape
for ovo_idx in range(ovo_dim):
# The ordering of the support vectors is arbitrary. Furthermore, if
# samples are duplicated with different class labels, they can be
# selected as duplicated support vectors with distinct dual
# coefficients.

# Therefore to check that the 2 SVMs parametrize the same decision
# function, we concatenate the dual coef with the matching support
# vector coordinates for all the support vectors of each classifier so
# that we can check that there are matching pairs (dual_coef,
# support_vector) in the two models by computing pairwise distances:
dc_sv1 = np.hstack([dc1[ovo_idx].reshape(-1, 1), sv1])
dc_sv2 = np.hstack([dc2[ovo_idx].reshape(-1, 1), sv2])

for sv1_idx in range(n_support_vectors):
sqdists = np.sum((dc_sv2 - dc_sv1[sv1_idx]) ** 2, axis=1)
sv2_idx = sqdists.argmin()
assert np.sqrt(sqdists[sv2_idx]) < atol

# For the linear kernel, also check that the aggregated coefficients of the
# linear decision function in the original feature space match.
if svc1.kernel == "linear":
max_absdiff = np.abs(_toarray(svc1.coef_) - _toarray(svc2.coef_)).max()
assert max_absdiff < atol


@pytest.mark.parametrize("kernel", ["linear", "poly", "rbf"])
def test_svc_iris(kernel, svc_tol=1e-12, atol=1e-7):
# The optimization results is not deterministic when the order of the iris
# samples is permutated as iris has duplicated samples that can be selected
# as support vector or not depending on the training set order. However the
# resulting decision function should be independent of the training set
# ordering.
# Order invariance is only guaranteed if the model has properly converged.
# hence the small tol value.
iris2_data, iris2_target = shuffle(iris.data, iris.target,
random_state=0)
params = {
"gamma": 1.,
"kernel": kernel,
"tol": svc_tol,
"C": 0.01,
}
sp_clf = svm.SVC(**params).fit(iris.data, iris.target)
clf = svm.SVC(**params).fit(iris.data.toarray(), iris.target)
sp_clf2 = svm.SVC(**params).fit(iris2_data, iris2_target)
clf2 = svm.SVC(**params).fit(iris2_data.toarray(), iris2_target)

_assert_svc_equal(clf, sp_clf, atol=atol)
_assert_svc_equal(clf, clf2, atol=atol)
_assert_svc_equal(sp_clf, sp_clf2, atol=atol)


def test_sparse_decision_function():
Expand Down Expand Up @@ -288,7 +346,8 @@ def test_sparse_oneclasssvm(datasets_index, kernel):
check_svm_model_equal(clf, sp_clf, *dataset)


def test_sparse_realdata():
@pytest.mark.parametrize("C", [0.01, 1, 100])
def test_sparse_20newsgroups_subset(C, atol=1e-7):
# Test on a subset from the 20newsgroups dataset.
# This catches some bugs if input is not correctly converted into
# sparse format or weights are not correctly initialized.
Expand All @@ -310,11 +369,10 @@ def test_sparse_realdata():
3., 0., 0., 2., 2., 1., 3., 1., 1., 0., 1., 2., 1.,
1., 3.])

clf = svm.SVC(kernel='linear').fit(X.toarray(), y)
sp_clf = svm.SVC(kernel='linear').fit(sparse.coo_matrix(X), y)

assert_array_equal(clf.support_vectors_, sp_clf.support_vectors_.toarray())
assert_array_equal(clf.dual_coef_, sp_clf.dual_coef_.toarray())
params = dict(kernel='linear', C=C, tol=1e-12)
clf = svm.SVC(**params).fit(X.toarray(), y)
sp_clf = svm.SVC(**params).fit(sparse.coo_matrix(X), y)
_assert_svc_equal(clf, sp_clf, atol=atol)


def test_sparse_svc_clone_with_callable_kernel():
Expand Down
21 changes: 11 additions & 10 deletions sklearn/svm/tests/test_svm.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def test_libsvm_iris():
def test_precomputed():
# SVC with a precomputed kernel.
# We test it with a toy dataset and with iris.
clf = svm.SVC(kernel='precomputed')
clf = svm.SVC(kernel='precomputed', tol=1e-10)
# Gram matrix for train data (square matrix)
# (we use just a linear kernel)
K = np.dot(X, np.array(X).T)
Expand Down Expand Up @@ -120,9 +120,10 @@ def test_precomputed():

# same as before, but using a callable function instead of the kernel
# matrix. kernel is just a linear kernel
def kfunc(x, y):
return np.dot(x, y.T)

kfunc = lambda x, y: np.dot(x, y.T)
clf = svm.SVC(gamma='scale', kernel=kfunc)
clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10)
clf.fit(X, Y)
pred = clf.predict(T)

Expand All @@ -133,15 +134,15 @@ def test_precomputed():

# test a precomputed kernel with the iris dataset
# and check parameters against a linear SVC
clf = svm.SVC(kernel='precomputed')
clf2 = svm.SVC(kernel='linear')
clf = svm.SVC(kernel='precomputed', tol=1e-10)
clf2 = svm.SVC(kernel='linear', tol=1e-10)
K = np.dot(iris.data, iris.data.T)
clf.fit(K, iris.target)
clf2.fit(iris.data, iris.target)
pred = clf.predict(K)
assert_array_almost_equal(clf.support_, clf2.support_)
assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_)
assert_array_almost_equal(clf.intercept_, clf2.intercept_)
assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_, decimal=4)
assert_array_almost_equal(clf.intercept_, clf2.intercept_, decimal=4)
assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

# Gram matrix for test data but compute KT[i,j]
Expand All @@ -154,7 +155,7 @@ def test_precomputed():
pred = clf.predict(K)
assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

clf = svm.SVC(gamma='scale', kernel=kfunc)
clf = svm.SVC(gamma='scale', kernel=kfunc, tol=1e-10)
clf.fit(iris.data, iris.target)
assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

Expand Down Expand Up @@ -429,7 +430,7 @@ def test_weight():
def test_sample_weights():
# Test weights on individual samples
# TODO: check on NuSVR, OneClass, etc.
clf = svm.SVC(gamma="scale")
clf = svm.SVC(gamma="scale", tol=1e-10)
clf.fit(X, Y)
assert_array_equal(clf.predict([X[2]]), [1.])

Expand All @@ -438,7 +439,7 @@ def test_sample_weights():
assert_array_equal(clf.predict([X[2]]), [2.])

# test that rescaling all samples is the same as changing C
clf = svm.SVC(gamma="scale")
clf = svm.SVC(gamma="scale", tol=1e-10)
clf.fit(X, Y)
dual_coef_no_weight = clf.dual_coef_
clf.set_params(C=100)
Expand Down