AIM: Implement pruning of Decision tree.
CODE:
import pandas as pd
from skleam.tree import Decision TreeClassifier # Import D
#from sklearn. preprocessing import StandardScaler
from skleam import preprocessing
from skleamn.model_selection import train_test_split # Import train_test_split function
from skleam import metrics #Import scikit-learn metries module for accuracy calculation
from six import StringlO
from IPython.display import Image
from skleam.tree import export_graphviz
import pydotplus
jon Tree Classifier
i#col_names=['Rec', Age’, Income’, 'Student,, ‘Credit rating’, ‘Buys computer’)
data = pd.read_esv("/content/covid19.csv", header=None)
fidata.head()
print(data shape)
print("records in the csv file-\n", data)
split dataset in features and target variable
Je = preprocessing LabelEncoder()
datal = data.apply(le.fit transform)
i#print(datal)
##feature_cols = ['Rec’, ‘Age’, income’, ‘Student’, ‘Credit rating’]
X= datal ilocf:, :-1] # Features
y= datal.iloc[:-1:]# Target variable
i#print(X)
print(y)
‘print(X_train, X_test, y_train, y_test)
X train, X test, y train, y test train test split(X, y, test size=0.3, random state=1) # 70%
training and 30% test
# Create Decision Tree classifer object
clf = DecisionTreeClassitier(criterion-"gini",max_depth=1,max_leaf_nodes~3)
# Train Decision Tree Classifer
clf= elf. fit(X_train,y_ train)
+#Predict the response for test dataset
¥_pred = elf predict(X_test)
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pted))dot_data = StringlOQ
export_graphviz(clf, out_file=dot_data,
filled=True, rounded=True,
special characters=True, feature names ~ data.iloc[0, 0:51)
graph = pydotplus.graph from dot data(dot data.getvalue()
graph.write_png(‘records.png’)
Image(graph.create_png())
OUTPUT:
(as, 6)
records in the esv file:
o 1 2 3 4 5
0 Age Country Fitness Stay Symtoms Safety
1 59 US POOR HOUSE NO YES:
2 40 US POOR HOSPITAL YES NO.
3 48 ITALY POOR HOSPITAL YES YES
4 55 US POOR HOSPITAL NO YES
5 62 ITALY POOR HOUSE NO YES
6 75 ITALY GOOD HOSPITAL YES NO
7 83 INDIA POOR HOUSE NO YES
8 90 US GOOD HOUSE NO NO
9 50 INDIA POOR HOSPITAL NO YES
10 57 SPAIN POOR HOUSE YES NO
11 46 INDIA GOOD HOSPITAL NO YES
12 33 ITALY POOR HOUSE YES NO
13 38 ITALY GOOD HOSPITAL YES NO
14 43 SPAIN GOOD HOUSE YES YES
Accuracy: 0.6AIM: Implement Linear Regression.
CODE:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as pt
from skleam import datasets
df pd.tead esv("iris.csv")
print¢adf)
Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \
on 5135 14 02
12 493.0 14 02
203 47 32 13 02
304 46 31 15 02
45 50-36 1402
145 146 6730 52 23
146 147 6325 5019
147 148 65 30 52 20
148149 62 434 S423
149 150 59 30 Sl 18
Species
0 Iris-setosa
1 Iris-setosa
2 Iris-setosa
3 Tris-setosa
4 Tris-setosa
145 Iris-virginica
146 Iris-virginica
147 Iris-virginica
148 ris-virginica
149 Iris-virginica
[150 rows x 6 columns]
dfinfoQ,
Rangelndex: 150 entries, 0 to 149
Data columns (total 6 columns):# Column — Non-Null Count Dtype
Id 150 non-null int64
SepalLengthCm 150 non-null float64
SepalWidthCm 150 non-null float64
PetalLengthCm 150 non-null float64
PetalWidthCm 150 non-null float64
Species 150 non-null object
types: float64(4), int64(1), object(1)
‘memory usage: 7.2+ KB
dfl'Species'|.value counts()
Iris-setosa 50
Iris-versicolor 50
Iris-virginica 50
Name: Species, dtype: int64
dfl'SepalWidthCm') hist)
colors=['red! ‘orange’ blue']
Species[Iris-virginica’ris-setosa',
for i in range(3):
x=dildff'Species'}—Speciesfi]
pltscatter(x['SepalL engthCm'],x['Sepal WidthCm'],c~colors[i],label=Species{i))
pltxlabel("Sepal length”)
pltylabel("Sepal width")
plt-legend()
Iris-versicolor']
45,
iriswirginica
© isversicolor
40
oe
Sepal with
Sepal lengthfor iin range(3):
x=df]df[ Species']=Species(i]]
pit.scatter(x['PetalLengthCm'].x['PetalWidthCm'],c=colorsfi],label=Species[i])
pltxlabel("Petal length")
pit ylabel("Petal width")
pltlegend()
25{ © wisvirginica
@ eversicotor .
20
@-
Fo
os i
sss°
eco 8
00 =
i 2 3 3 3 6 7
Petal length
7
zo 1s 20 25
import matplotlib.pyplot as plt
pit scatter(x,y)
pltxlabel(‘petal_width’)
pltxlabel(‘petal_length’)
‘Text(0.5, 0, ‘petal length’)
oo os re 1s 20 25
petal_lengthfrom sklearn.model
x train,x_testy_train,
import numpy as np
x train-np.array(x.train).reshape(-1,1)
x train
ection import train_test_split
test=train_test_split(x.y,test_size=0.30,random_state=20)
array({{0.2],
[0.4],
[0.2],
(2.1),
{0.4},
[2.2],
23],
23],
013),
{0.2},
{1.4},
(1.8),
[0.4],
131)
import numpy as np
X test-np.array(x test).reshape(-1,1)
x test
array({(0.2],
{1.2},
[.3},
(1.6),
.],
1.3),
3},from skleam.linear_model import LinearRegression
IrLinearRegression()
Irfit(x_train , y_train)
+ LinearRegression
LinearRegression()
intercept
1.035634950775612
m=ir.coef
m
array((2.20592445))
y_pred_train-m*x_train+e
y_pred_train.flatten()
array((1.47681984, 1.91800473, 1.47681984, 5.44748386, 1.91800473,
5.88866875, 6.1092612 , 1.91800473, 6.55044609, 3.24155941,
390333674, 4.34452163, 1.69741229, 1,47681984, 3.46215185,
390333674, 3.6827443 , 3.46215185, 4.34452163, 4.34452163,
3.90333674, 2.13859718, 4.34452163, 3.90333674, 1.2562274 ,
1.47681984, 4.34452163, 3.90333674, 4.12392919, 1.69741229,
1.47681984, 6.1092612 , 1.47681984, 1.47681984, 3.24155941,
5.00629897, 5.22689141, 5.44748386, 5,00629897, 5.66807631,
5.66807631, 3.6827443 , 5,00629897, 6.1092612 , 6.55044609,
1.47681984, 5.66807631, 1.47681984, 1.47681984, 1.47681984,
6.1092612 , 4.34452163, 5.00629897, 5.6680763 1, 5.00629897,
3.90333674, 1.47681984, 5.00629897, 1,47681984, 3.24155941,
5.88866875, 1.47681984, 4.34452163, 4,12392919, 5.00629897,
1,69741229, 1.47681984, 5.00629897, 6.55044609, 4.12392919,
3.6827443 , 3.24155941, 5.6680763 1, 6.32985364, 2.35918962,
6.1092612 , 1,69741229, 4.78570652, 1.47681984, 1.69741229,
5.22689141, 6.32985364, 5.00629897, 147681984, 1.262274 ,
5.44748386, 4.5651 1408, 4.34452163, 5.44748386, 1.47681984,
6.1092612 , 1.69741229, 1.47681984, 5.22689141, 5.6680763 1,
1,91800473, 1.91800473, 1.69741229, 6.1092612 , 3.90333674,
1.47681984, 4.12392919, 5.00629897, 1.91800473, 3.90333674))import matplotlib.pyplot as plt
pltscatter(x_train,y_train)
plt:plot(x_train,y_pred_train,color ='ted’)
pit.xlabel(‘petal_ width’)
plt.xlabel(‘petal length’)
‘Text(0.5, 0, ‘petal length’)
7
oo os Lo 1s 20 2's
petal_lengthAIM: Implement an Artificial Neural Network by implementing the Back propagation
algorithm and test the same using appropriate data set.
CODE:
import numpy as np
import pandas as pd
from sklearn datasets import load_iris
from skleam.model_selection import train_test_split
import matplotlib.pyplot as plt
## Load dataset
data = load_i
i)
# Get features and target
ta.data
y-data.target
# Get dummy variable
y=pd.get_dummies(y).values
yl3]
array(([1, 0, 0),
[1,0 0},
[1, 0, O]], dtype=uints)
‘#Split data into train and test data
x train, x_test, y_train, y_test = train_test_split(x, y, test_size=20, random_state=4)
# Initialize variables
learning rate=0.1
iterations = 5000
N=y_train.size
# number of input features
input size=4
+# number of hidden layers neurons
hidden_size = 2
# number of neurons at the output layer
output_size =3
results = pd.DataFrame(columns="mse", "accuracy"])
# Initialize weights
np.random.seed(10)# initializing weight for the hidden layer
W1 = np.random.normal(scale=0.5, size=(input_size, hidden_size))
# initializing weight for the output layer
W2 =np.random.normal(scale=0.5, size-(hidden size , output size))
def sigmoid(x):
return 1 / (1+ np.exp(0))
def mean squared error(y pred, y true):
return ((y pred~y true)**2),sum()/ (2*y pred.size)
def accuracy(y_pred, y_true)
ace = y_pred.argmax(axis=1) — y_true.argmax(axis=1)
return ace.mean()
for itr in range(iterations):
# feedforward propagation
# on hidden layer
Z1 =np.dot(x train, W1)
Al =sigmoid(Z1)
# on output layer
Z2.=np.dot(Al, W2)
A2 = sigmoid(Z2)
results-pd DataFrame()
# Calculating error
mse =mean squared error(A2, y train)
ace = accuraey(A2, y train)
new_data = pd.DataFrame({"ms
": [mse], "accuracy": [ace]})
# Concatenate the new DataFrame with the original DataFrame
results = pd.concat([results, new data], ignore index=True)
print(results)
# backpropagation
E1=A2-y train
dW1=E1 * A2*(1-A2)
E2 = np.dot(dW1, W2.T)
dW2=E2* Al*(1-Al),# weight updates
W2_update = np.dot(A1.T, dW1) / N
WI_update = np.dot(x_train.T, dW2)/N
W2=W2-leaming rate * W2 update
WI=WI-leaming rate * WI update
Streaming output truncated to the last 5000 lines.
mse accuracy
0 0.050388 0.976923
mse accuracy
0 0.050387 0.976923
mse accuracy
0 0.050387 0.976923
mse accuracy
0 0.050386 0.976923,
mse accuracy
0 0.050385 0.976923
# feedforward
Z1 = np.dot(x_test, W1)
Al = sigmoid(Z1)
22 = np.dot(Al, W2)
A2= sigmoid(Z2)
ace = accuracy(A2, y_test)
print("Accuracy: {}"Tormat(ace))
Accuracy: 0.95