support-vector-machine
November 13, 2024
[1]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
[2]: plt.rcParams['figure.figsize']=[19,8]
[4]: import warnings
warnings.filterwarnings('ignore')
[6]: from sklearn.datasets import load_iris
[7]: iris=load_iris()
[8]: dir(iris)
[8]: ['DESCR',
'data',
'data_module',
'feature_names',
'filename',
'frame',
'target',
'target_names']
[9]: iris_df=pd.DataFrame(data=iris.data,columns=iris.feature_names)
iris_df.head()
[9]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
[10]: iris_df['target'] = iris.target
1
[11]: iris_df.head()
[11]: sepal length (cm) sepal width (cm) petal length (cm) petal width (cm) \
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
target
0 0
1 0
2 0
3 0
4 0
[12]: iris_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 sepal length (cm) 150 non-null float64
1 sepal width (cm) 150 non-null float64
2 petal length (cm) 150 non-null float64
3 petal width (cm) 150 non-null float64
4 target 150 non-null int32
dtypes: float64(4), int32(1)
memory usage: 5.4 KB
[13]: iris.target_names
[13]: array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
[14]: iris_df.duplicated().sum()
[14]: 1
[15]: iris_df.drop_duplicates(inplace=True)
[16]: iris_df.duplicated().sum()
[16]: 0
[18]: sns.countplot(data=iris_df, x='target')
plt.title("Count of target values")
2
plt.show()
[22]: iris_setosa = iris_df.loc[iris_df['target'] == 0, :]
iris_versicolor = iris_df.loc[iris_df['target'] == 1, :]
iris_virginica = iris_df.loc[iris_df['target'] == 2, :]
[26]: sns.scatterplot(data=iris_setosa, x='petal length (cm)', y='petal width (cm)',␣
↪s=150,)
sns.scatterplot(data=iris_versicolor, x='petal length (cm)', y='petal width␣
↪(cm)', s=150,)
sns.scatterplot(data=iris_virginica, x='petal length (cm)', y='petal width␣
↪(cm)', s=150,)
plt.legend(['Setosa', 'Versicolor', 'Virginica'], loc='lower right')
plt.xlabel('Petal Length (cm)')
plt.ylabel('Petal Width (cm)')
plt.show()
3
[32]: sns.scatterplot(data=iris_setosa, x='sepal length (cm)', y='sepal width (cm)',␣
↪s=150, label='Setosa')
sns.scatterplot(data=iris_versicolor, x='sepal length (cm)', y='sepal width␣
↪(cm)', s=150, label='Versicolor')
sns.scatterplot(data=iris_virginica, x='sepal length (cm)', y='sepal width␣
↪(cm)', s=150, label='Virginica')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.legend(loc='lower right')
plt.show()
[33]: sns.boxplot(iris_df)
plt.show()
4
[34]: # obtain the first quartile
Q1 = iris_df.quantile(0.25)
# obtain the third quartile
Q3 = iris_df.quantile(0.75)
# obtain the IQR
IQR= Q3-Q1
#print the IQR
print(IQR)
sepal length (cm) 1.3
sepal width (cm) 0.5
petal length (cm) 3.5
petal width (cm) 1.5
target 2.0
dtype: float64
[35]: ul =Q3 + 1.5*IQR
ll =Q1-1.5*IQR
[36]: iris_df= iris_df[~((iris_df <ll) |(iris_df> ul)).any(axis=1)]
[37]: sns.boxplot(iris_df)
plt.show()
5
[40]: X =iris_df.loc[:,:'petal width (cm)'].values
y =iris_df.loc[:, 'target'].values
[41]: y
[41]: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
[42]: from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
[43]: from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.
↪2,random_state=1)
[44]: from sklearn.svm import SVC # Support Vector Classifier
[45]: model = SVC(kernel='linear')
model.fit(X_train, y_train)
[45]: SVC(kernel='linear')
[46]: model.score(X_train, y_train)
6
[46]: 0.9827586206896551
[47]: y_predict = model.predict(X_test)
[48]: y_predict
[48]: array([0, 2, 0, 2, 1, 0, 0, 2, 0, 1, 1, 1, 1, 2, 0, 2, 0, 0, 0, 1, 2, 1,
0, 0, 2, 2, 2, 2, 1])
[49]: y_test
[49]: array([0, 1, 0, 2, 1, 0, 0, 2, 0, 1, 1, 1, 1, 1, 0, 2, 0, 0, 0, 1, 2, 1,
0, 0, 2, 2, 2, 2, 1])
[50]: from sklearn.metrics import confusion_matrix
[51]: cm = confusion_matrix(y_test, y_predict)
[52]: cm
[52]: array([[11, 0, 0],
[ 0, 8, 2],
[ 0, 0, 8]], dtype=int64)
[ ]: