From 2e291648c88c646b780bade32e4b9c68e8efe23a Mon Sep 17 00:00:00 2001 From: Vathsala Achar Date: Sat, 13 May 2017 19:50:36 +0100 Subject: [PATCH 1/3] Simplifying margin plotting in SVM examples (#8501) * updated to use contour levels on decision function * separating unbalanced class now uses a red line to show the change in the decision boundary when the classes are weighted * corrected the target variable from Y to y --- examples/svm/plot_custom_kernel.py | 12 ++-- examples/svm/plot_separating_hyperplane.py | 56 +++++++++---------- .../plot_separating_hyperplane_unbalanced.py | 48 ++++++++++------ examples/svm/plot_svm_kernels.py | 6 +- examples/svm/plot_svm_margin.py | 6 +- examples/svm/plot_svm_nonlinear.py | 6 +- 6 files changed, 74 insertions(+), 60 deletions(-) diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index 28641cd35f8cb..803b92b63dc5a 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -17,26 +17,26 @@ iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset -Y = iris.target +y = iris.target -def my_kernel(X, Y): +def my_kernel(X, y): """ We create a custom kernel: (2 0) - k(X, Y) = X ( ) Y.T + k(X, y) = X ( ) y.T (0 1) """ M = np.array([[2, 0], [0, 1.0]]) - return np.dot(np.dot(X, M), Y.T) + return np.dot(np.dot(X, M), y.T) h = .02 # step size in the mesh # we create an instance of SVM and fit out data. clf = svm.SVC(kernel=my_kernel) -clf.fit(X, Y) +clf.fit(X, y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. @@ -50,7 +50,7 @@ def my_kernel(X, Y): plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) # Plot also the training points -plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k') +plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k') plt.title('3-Class classification using Support Vector Machine with custom' ' kernel') plt.axis('tight') diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index ff6f3fc8f31ad..e80e3e0c05e2b 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -12,37 +12,35 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm +from sklearn.datasets import make_blobs + # we create 40 separable points -np.random.seed(0) -X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] -Y = [0] * 20 + [1] * 20 +X, y = make_blobs(n_samples=40, centers=2, random_state=7) # fit the model clf = svm.SVC(kernel='linear') -clf.fit(X, Y) - -# get the separating hyperplane -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - (clf.intercept_[0]) / w[1] - -# plot the parallels to the separating hyperplane that pass through the -# support vectors -b = clf.support_vectors_[0] -yy_down = a * xx + (b[1] - a * b[0]) -b = clf.support_vectors_[-1] -yy_up = a * xx + (b[1] - a * b[0]) - -# plot the line, the points, and the nearest vectors to the plane -plt.plot(xx, yy, 'k-') -plt.plot(xx, yy_down, 'k--') -plt.plot(xx, yy_up, 'k--') - -plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], - s=80, facecolors='none') -plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) - -plt.axis('tight') -plt.show() +clf.fit(X, y) + +plt.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=plt.cm.Paired) + +# plot the decision function +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +ax.contour(XX, YY, Z, colors='k', + levels=[-1, 0, 1], alpha=0.5, + linestyles=['--', '-', '--']) +# plot support vectors +ax.scatter(clf.support_vectors_[:, 0], + clf.support_vectors_[:, 1], + s=100, linewidth=1, facecolors='none'); diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index 438291dc5538d..dd94deae6bace 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -29,7 +29,7 @@ import numpy as np import matplotlib.pyplot as plt from sklearn import svm -#from sklearn.linear_model import SGDClassifier + # we create 40 separable points rng = np.random.RandomState(0) @@ -43,25 +43,41 @@ clf = svm.SVC(kernel='linear', C=1.0) clf.fit(X, y) -w = clf.coef_[0] -a = -w[0] / w[1] -xx = np.linspace(-5, 5) -yy = a * xx - clf.intercept_[0] / w[1] - - -# get the separating hyperplane using weighted classes +# fit the model and get the separating hyperplane using weighted classes wclf = svm.SVC(kernel='linear', class_weight={1: 10}) wclf.fit(X, y) -ww = wclf.coef_[0] -wa = -ww[0] / ww[1] -wyy = wa * xx - wclf.intercept_[0] / ww[1] - # plot separating hyperplanes and samples -h0 = plt.plot(xx, yy, 'k-', label='no weights') -h1 = plt.plot(xx, wyy, 'k--', label='with weights') plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k') plt.legend() -plt.axis('tight') -plt.show() +# plot the decision functions for both classifiers +ax = plt.gca() +xlim = ax.get_xlim() +ylim = ax.get_ylim() + +# create grid to evaluate model +xx = np.linspace(xlim[0], xlim[1], 30) +yy = np.linspace(ylim[0], ylim[1], 30) +YY, XX = np.meshgrid(yy, xx) +xy = np.vstack([XX.ravel(), YY.ravel()]).T + +# get the separating hyperplane +Z = clf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins +a = ax.contour(XX, YY, Z, colors='k', + levels=[0], alpha=0.5, + linestyles=['-']) + +# get the separating hyperplane for weighted classes +Z = wclf.decision_function(xy).reshape(XX.shape) + +# plot decision boundary and margins for weighted classes +b = ax.contour(XX, YY, Z, colors='r', + levels=[0], alpha=0.5, + linestyles=['-']) + +plt.legend([a.collections[0], b.collections[0]], + ["non weighted", "weighted"], + loc="upper right"); diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index dbad4e0b725e2..fbc57c5a10a6f 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -41,7 +41,7 @@ (.2, -2.3), (0, -2.7), (1.3, 2.1)].T -Y = [0] * 8 + [1] * 8 +y = [0] * 8 + [1] * 8 # figure number fignum = 1 @@ -49,7 +49,7 @@ # fit the model for kernel in ('linear', 'poly', 'rbf'): clf = svm.SVC(kernel=kernel, gamma=2) - clf.fit(X, Y) + clf.fit(X, y) # plot the line, the points, and the nearest vectors to the plane plt.figure(fignum, figsize=(4, 3)) @@ -57,7 +57,7 @@ plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none', zorder=10, edgecolors='k') - plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired, + plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, edgecolors='k') plt.axis('tight') diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index 2fdc29c1b29bd..bded813c19c9d 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -29,7 +29,7 @@ # we create 40 separable points np.random.seed(0) X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] -Y = [0] * 20 + [1] * 20 +y = [0] * 20 + [1] * 20 # figure number fignum = 1 @@ -38,7 +38,7 @@ for name, penalty in (('unreg', 1), ('reg', 0.05)): clf = svm.SVC(kernel='linear', C=penalty) - clf.fit(X, Y) + clf.fit(X, y) # get the separating hyperplane w = clf.coef_[0] @@ -63,7 +63,7 @@ plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none', zorder=10, edgecolors='k') - plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired, + plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, edgecolors='k') plt.axis('tight') diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py index c453ef391a16c..41f081a9763ec 100644 --- a/examples/svm/plot_svm_nonlinear.py +++ b/examples/svm/plot_svm_nonlinear.py @@ -19,11 +19,11 @@ np.linspace(-3, 3, 500)) np.random.seed(0) X = np.random.randn(300, 2) -Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) +y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # fit the model clf = svm.NuSVC() -clf.fit(X, Y) +clf.fit(X, y) # plot the decision function for each datapoint on the grid Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) @@ -34,7 +34,7 @@ origin='lower', cmap=plt.cm.PuOr_r) contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linetypes='--') -plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, +plt.scatter(X[:, 0], X[:, 1], s=30, c=y, cmap=plt.cm.Paired, edgecolors='k') plt.xticks(()) plt.yticks(()) From cc2c1d7b640045712ce6f41a5bdb9368e7231472 Mon Sep 17 00:00:00 2001 From: Vathsala Achar Date: Wed, 7 Jun 2017 12:48:03 +0100 Subject: [PATCH 2/3] DOC Updates to SVM examples * Fixing flake8 issues * Altered make_blobs to move clusters to corners and be more compact * Reverted changes converting Y to y --- examples/svm/plot_custom_kernel.py | 12 ++++++------ examples/svm/plot_separating_hyperplane.py | 10 +++------- .../svm/plot_separating_hyperplane_unbalanced.py | 13 +++---------- examples/svm/plot_svm_kernels.py | 6 +++--- examples/svm/plot_svm_margin.py | 6 +++--- examples/svm/plot_svm_nonlinear.py | 6 +++--- 6 files changed, 21 insertions(+), 32 deletions(-) diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py index 803b92b63dc5a..28641cd35f8cb 100644 --- a/examples/svm/plot_custom_kernel.py +++ b/examples/svm/plot_custom_kernel.py @@ -17,26 +17,26 @@ iris = datasets.load_iris() X = iris.data[:, :2] # we only take the first two features. We could # avoid this ugly slicing by using a two-dim dataset -y = iris.target +Y = iris.target -def my_kernel(X, y): +def my_kernel(X, Y): """ We create a custom kernel: (2 0) - k(X, y) = X ( ) y.T + k(X, Y) = X ( ) Y.T (0 1) """ M = np.array([[2, 0], [0, 1.0]]) - return np.dot(np.dot(X, M), y.T) + return np.dot(np.dot(X, M), Y.T) h = .02 # step size in the mesh # we create an instance of SVM and fit out data. clf = svm.SVC(kernel=my_kernel) -clf.fit(X, y) +clf.fit(X, Y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. @@ -50,7 +50,7 @@ def my_kernel(X, y): plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) # Plot also the training points -plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired, edgecolors='k') +plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired, edgecolors='k') plt.title('3-Class classification using Support Vector Machine with custom' ' kernel') plt.axis('tight') diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index e80e3e0c05e2b..6df8dbfd9ae60 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -16,7 +16,7 @@ # we create 40 separable points -X, y = make_blobs(n_samples=40, centers=2, random_state=7) +X, y = make_blobs(n_samples=40, centers=2, random_state=12, cluster_std=0.35) # fit the model clf = svm.SVC(kernel='linear') @@ -37,10 +37,6 @@ Z = clf.decision_function(xy).reshape(XX.shape) # plot decision boundary and margins -ax.contour(XX, YY, Z, colors='k', - levels=[-1, 0, 1], alpha=0.5, - linestyles=['--', '-', '--']) +ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) # plot support vectors -ax.scatter(clf.support_vectors_[:, 0], - clf.support_vectors_[:, 1], - s=100, linewidth=1, facecolors='none'); +ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, linewidth=1, facecolors='none') diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index dd94deae6bace..df0dac0ec2cc4 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -30,7 +30,6 @@ import matplotlib.pyplot as plt from sklearn import svm - # we create 40 separable points rng = np.random.RandomState(0) n_samples_1 = 1000 @@ -66,18 +65,12 @@ Z = clf.decision_function(xy).reshape(XX.shape) # plot decision boundary and margins -a = ax.contour(XX, YY, Z, colors='k', - levels=[0], alpha=0.5, - linestyles=['-']) +a = ax.contour(XX, YY, Z, colors='k', levels=[0], alpha=0.5, linestyles=['-']) # get the separating hyperplane for weighted classes Z = wclf.decision_function(xy).reshape(XX.shape) # plot decision boundary and margins for weighted classes -b = ax.contour(XX, YY, Z, colors='r', - levels=[0], alpha=0.5, - linestyles=['-']) +b = ax.contour(XX, YY, Z, colors='r', levels=[0], alpha=0.5, linestyles=['-']) -plt.legend([a.collections[0], b.collections[0]], - ["non weighted", "weighted"], - loc="upper right"); +plt.legend([a.collections[0], b.collections[0]], ["non weighted", "weighted"], loc="upper right") diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py index fbc57c5a10a6f..dbad4e0b725e2 100644 --- a/examples/svm/plot_svm_kernels.py +++ b/examples/svm/plot_svm_kernels.py @@ -41,7 +41,7 @@ (.2, -2.3), (0, -2.7), (1.3, 2.1)].T -y = [0] * 8 + [1] * 8 +Y = [0] * 8 + [1] * 8 # figure number fignum = 1 @@ -49,7 +49,7 @@ # fit the model for kernel in ('linear', 'poly', 'rbf'): clf = svm.SVC(kernel=kernel, gamma=2) - clf.fit(X, y) + clf.fit(X, Y) # plot the line, the points, and the nearest vectors to the plane plt.figure(fignum, figsize=(4, 3)) @@ -57,7 +57,7 @@ plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none', zorder=10, edgecolors='k') - plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, + plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired, edgecolors='k') plt.axis('tight') diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py index bded813c19c9d..2fdc29c1b29bd 100644 --- a/examples/svm/plot_svm_margin.py +++ b/examples/svm/plot_svm_margin.py @@ -29,7 +29,7 @@ # we create 40 separable points np.random.seed(0) X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]] -y = [0] * 20 + [1] * 20 +Y = [0] * 20 + [1] * 20 # figure number fignum = 1 @@ -38,7 +38,7 @@ for name, penalty in (('unreg', 1), ('reg', 0.05)): clf = svm.SVC(kernel='linear', C=penalty) - clf.fit(X, y) + clf.fit(X, Y) # get the separating hyperplane w = clf.coef_[0] @@ -63,7 +63,7 @@ plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80, facecolors='none', zorder=10, edgecolors='k') - plt.scatter(X[:, 0], X[:, 1], c=y, zorder=10, cmap=plt.cm.Paired, + plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired, edgecolors='k') plt.axis('tight') diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py index 41f081a9763ec..c453ef391a16c 100644 --- a/examples/svm/plot_svm_nonlinear.py +++ b/examples/svm/plot_svm_nonlinear.py @@ -19,11 +19,11 @@ np.linspace(-3, 3, 500)) np.random.seed(0) X = np.random.randn(300, 2) -y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) +Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) # fit the model clf = svm.NuSVC() -clf.fit(X, y) +clf.fit(X, Y) # plot the decision function for each datapoint on the grid Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) @@ -34,7 +34,7 @@ origin='lower', cmap=plt.cm.PuOr_r) contours = plt.contour(xx, yy, Z, levels=[0], linewidths=2, linetypes='--') -plt.scatter(X[:, 0], X[:, 1], s=30, c=y, cmap=plt.cm.Paired, +plt.scatter(X[:, 0], X[:, 1], s=30, c=Y, cmap=plt.cm.Paired, edgecolors='k') plt.xticks(()) plt.yticks(()) From 11db8aa601ad4b8c32c5601599d73a1222cbb37b Mon Sep 17 00:00:00 2001 From: Vathsala Achar Date: Thu, 8 Jun 2017 17:23:03 +0100 Subject: [PATCH 3/3] Fixes for flake8 errors --- examples/svm/plot_separating_hyperplane.py | 6 ++++-- examples/svm/plot_separating_hyperplane_unbalanced.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py index 6df8dbfd9ae60..fafadb2d381d0 100644 --- a/examples/svm/plot_separating_hyperplane.py +++ b/examples/svm/plot_separating_hyperplane.py @@ -37,6 +37,8 @@ Z = clf.decision_function(xy).reshape(XX.shape) # plot decision boundary and margins -ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) +ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5, + linestyles=['--', '-', '--']) # plot support vectors -ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, linewidth=1, facecolors='none') +ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100, + linewidth=1, facecolors='none') diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py index df0dac0ec2cc4..cf3130a6ae5c5 100644 --- a/examples/svm/plot_separating_hyperplane_unbalanced.py +++ b/examples/svm/plot_separating_hyperplane_unbalanced.py @@ -73,4 +73,5 @@ # plot decision boundary and margins for weighted classes b = ax.contour(XX, YY, Z, colors='r', levels=[0], alpha=0.5, linestyles=['-']) -plt.legend([a.collections[0], b.collections[0]], ["non weighted", "weighted"], loc="upper right") +plt.legend([a.collections[0], b.collections[0]], ["non weighted", "weighted"], + loc="upper right")