Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
6 views13 pages

ML L - Ab

machine learning lab programs .Decision tree ,pre pruning,post pruning

Uploaded by

madhaviravitp
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF or read online on Scribd
0% found this document useful (0 votes)
6 views13 pages

ML L - Ab

machine learning lab programs .Decision tree ,pre pruning,post pruning

Uploaded by

madhaviravitp
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF or read online on Scribd
You are on page 1/ 13
AIM: Implement pruning of Decision tree. CODE: import pandas as pd from skleam.tree import Decision TreeClassifier # Import D #from sklearn. preprocessing import StandardScaler from skleam import preprocessing from skleamn.model_selection import train_test_split # Import train_test_split function from skleam import metrics #Import scikit-learn metries module for accuracy calculation from six import StringlO from IPython.display import Image from skleam.tree import export_graphviz import pydotplus jon Tree Classifier i#col_names=['Rec', Age’, Income’, 'Student,, ‘Credit rating’, ‘Buys computer’) data = pd.read_esv("/content/covid19.csv", header=None) fidata.head() print(data shape) print("records in the csv file-\n", data) split dataset in features and target variable Je = preprocessing LabelEncoder() datal = data.apply(le.fit transform) i#print(datal) ##feature_cols = ['Rec’, ‘Age’, income’, ‘Student’, ‘Credit rating’] X= datal ilocf:, :-1] # Features y= datal.iloc[:-1:]# Target variable i#print(X) print(y) ‘print(X_train, X_test, y_train, y_test) X train, X test, y train, y test train test split(X, y, test size=0.3, random state=1) # 70% training and 30% test # Create Decision Tree classifer object clf = DecisionTreeClassitier(criterion-"gini",max_depth=1,max_leaf_nodes~3) # Train Decision Tree Classifer clf= elf. fit(X_train,y_ train) +#Predict the response for test dataset ¥_pred = elf predict(X_test) # Model Accuracy, how often is the classifier correct? print("Accuracy:",metrics.accuracy_score(y_test, y_pted)) dot_data = StringlOQ export_graphviz(clf, out_file=dot_data, filled=True, rounded=True, special characters=True, feature names ~ data.iloc[0, 0:51) graph = pydotplus.graph from dot data(dot data.getvalue() graph.write_png(‘records.png’) Image(graph.create_png()) OUTPUT: (as, 6) records in the esv file: o 1 2 3 4 5 0 Age Country Fitness Stay Symtoms Safety 1 59 US POOR HOUSE NO YES: 2 40 US POOR HOSPITAL YES NO. 3 48 ITALY POOR HOSPITAL YES YES 4 55 US POOR HOSPITAL NO YES 5 62 ITALY POOR HOUSE NO YES 6 75 ITALY GOOD HOSPITAL YES NO 7 83 INDIA POOR HOUSE NO YES 8 90 US GOOD HOUSE NO NO 9 50 INDIA POOR HOSPITAL NO YES 10 57 SPAIN POOR HOUSE YES NO 11 46 INDIA GOOD HOSPITAL NO YES 12 33 ITALY POOR HOUSE YES NO 13 38 ITALY GOOD HOSPITAL YES NO 14 43 SPAIN GOOD HOUSE YES YES Accuracy: 0.6 AIM: Implement Linear Regression. CODE: import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as pt from skleam import datasets df pd.tead esv("iris.csv") print¢adf) Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm \ on 5135 14 02 12 493.0 14 02 203 47 32 13 02 304 46 31 15 02 45 50-36 1402 145 146 6730 52 23 146 147 6325 5019 147 148 65 30 52 20 148149 62 434 S423 149 150 59 30 Sl 18 Species 0 Iris-setosa 1 Iris-setosa 2 Iris-setosa 3 Tris-setosa 4 Tris-setosa 145 Iris-virginica 146 Iris-virginica 147 Iris-virginica 148 ris-virginica 149 Iris-virginica [150 rows x 6 columns] dfinfoQ, Rangelndex: 150 entries, 0 to 149 Data columns (total 6 columns): # Column — Non-Null Count Dtype Id 150 non-null int64 SepalLengthCm 150 non-null float64 SepalWidthCm 150 non-null float64 PetalLengthCm 150 non-null float64 PetalWidthCm 150 non-null float64 Species 150 non-null object types: float64(4), int64(1), object(1) ‘memory usage: 7.2+ KB dfl'Species'|.value counts() Iris-setosa 50 Iris-versicolor 50 Iris-virginica 50 Name: Species, dtype: int64 dfl'SepalWidthCm') hist) colors=['red! ‘orange’ blue'] Species[Iris-virginica’ris-setosa', for i in range(3): x=dildff'Species'}—Speciesfi] pltscatter(x['SepalL engthCm'],x['Sepal WidthCm'],c~colors[i],label=Species{i)) pltxlabel("Sepal length”) pltylabel("Sepal width") plt-legend() Iris-versicolor'] 45, iriswirginica © isversicolor 40 oe Sepal with Sepal length for iin range(3): x=df]df[ Species']=Species(i]] pit.scatter(x['PetalLengthCm'].x['PetalWidthCm'],c=colorsfi],label=Species[i]) pltxlabel("Petal length") pit ylabel("Petal width") pltlegend() 25{ © wisvirginica @ eversicotor . 20 @- Fo os i sss° eco 8 00 = i 2 3 3 3 6 7 Petal length 7 zo 1s 20 25 import matplotlib.pyplot as plt pit scatter(x,y) pltxlabel(‘petal_width’) pltxlabel(‘petal_length’) ‘Text(0.5, 0, ‘petal length’) oo os re 1s 20 25 petal_length from sklearn.model x train,x_testy_train, import numpy as np x train-np.array(x.train).reshape(-1,1) x train ection import train_test_split test=train_test_split(x.y,test_size=0.30,random_state=20) array({{0.2], [0.4], [0.2], (2.1), {0.4}, [2.2], 23], 23], 013), {0.2}, {1.4}, (1.8), [0.4], 131) import numpy as np X test-np.array(x test).reshape(-1,1) x test array({(0.2], {1.2}, [.3}, (1.6), .], 1.3), 3}, from skleam.linear_model import LinearRegression IrLinearRegression() Irfit(x_train , y_train) + LinearRegression LinearRegression() intercept 1.035634950775612 m=ir.coef m array((2.20592445)) y_pred_train-m*x_train+e y_pred_train.flatten() array((1.47681984, 1.91800473, 1.47681984, 5.44748386, 1.91800473, 5.88866875, 6.1092612 , 1.91800473, 6.55044609, 3.24155941, 390333674, 4.34452163, 1.69741229, 1,47681984, 3.46215185, 390333674, 3.6827443 , 3.46215185, 4.34452163, 4.34452163, 3.90333674, 2.13859718, 4.34452163, 3.90333674, 1.2562274 , 1.47681984, 4.34452163, 3.90333674, 4.12392919, 1.69741229, 1.47681984, 6.1092612 , 1.47681984, 1.47681984, 3.24155941, 5.00629897, 5.22689141, 5.44748386, 5,00629897, 5.66807631, 5.66807631, 3.6827443 , 5,00629897, 6.1092612 , 6.55044609, 1.47681984, 5.66807631, 1.47681984, 1.47681984, 1.47681984, 6.1092612 , 4.34452163, 5.00629897, 5.6680763 1, 5.00629897, 3.90333674, 1.47681984, 5.00629897, 1,47681984, 3.24155941, 5.88866875, 1.47681984, 4.34452163, 4,12392919, 5.00629897, 1,69741229, 1.47681984, 5.00629897, 6.55044609, 4.12392919, 3.6827443 , 3.24155941, 5.6680763 1, 6.32985364, 2.35918962, 6.1092612 , 1,69741229, 4.78570652, 1.47681984, 1.69741229, 5.22689141, 6.32985364, 5.00629897, 147681984, 1.262274 , 5.44748386, 4.5651 1408, 4.34452163, 5.44748386, 1.47681984, 6.1092612 , 1.69741229, 1.47681984, 5.22689141, 5.6680763 1, 1,91800473, 1.91800473, 1.69741229, 6.1092612 , 3.90333674, 1.47681984, 4.12392919, 5.00629897, 1.91800473, 3.90333674)) import matplotlib.pyplot as plt pltscatter(x_train,y_train) plt:plot(x_train,y_pred_train,color ='ted’) pit.xlabel(‘petal_ width’) plt.xlabel(‘petal length’) ‘Text(0.5, 0, ‘petal length’) 7 oo os Lo 1s 20 2's petal_length AIM: Implement an Artificial Neural Network by implementing the Back propagation algorithm and test the same using appropriate data set. CODE: import numpy as np import pandas as pd from sklearn datasets import load_iris from skleam.model_selection import train_test_split import matplotlib.pyplot as plt ## Load dataset data = load_i i) # Get features and target ta.data y-data.target # Get dummy variable y=pd.get_dummies(y).values yl3] array(([1, 0, 0), [1,0 0}, [1, 0, O]], dtype=uints) ‘#Split data into train and test data x train, x_test, y_train, y_test = train_test_split(x, y, test_size=20, random_state=4) # Initialize variables learning rate=0.1 iterations = 5000 N=y_train.size # number of input features input size=4 +# number of hidden layers neurons hidden_size = 2 # number of neurons at the output layer output_size =3 results = pd.DataFrame(columns="mse", "accuracy"]) # Initialize weights np.random.seed(10) # initializing weight for the hidden layer W1 = np.random.normal(scale=0.5, size=(input_size, hidden_size)) # initializing weight for the output layer W2 =np.random.normal(scale=0.5, size-(hidden size , output size)) def sigmoid(x): return 1 / (1+ np.exp(0)) def mean squared error(y pred, y true): return ((y pred~y true)**2),sum()/ (2*y pred.size) def accuracy(y_pred, y_true) ace = y_pred.argmax(axis=1) — y_true.argmax(axis=1) return ace.mean() for itr in range(iterations): # feedforward propagation # on hidden layer Z1 =np.dot(x train, W1) Al =sigmoid(Z1) # on output layer Z2.=np.dot(Al, W2) A2 = sigmoid(Z2) results-pd DataFrame() # Calculating error mse =mean squared error(A2, y train) ace = accuraey(A2, y train) new_data = pd.DataFrame({"ms ": [mse], "accuracy": [ace]}) # Concatenate the new DataFrame with the original DataFrame results = pd.concat([results, new data], ignore index=True) print(results) # backpropagation E1=A2-y train dW1=E1 * A2*(1-A2) E2 = np.dot(dW1, W2.T) dW2=E2* Al*(1-Al), # weight updates W2_update = np.dot(A1.T, dW1) / N WI_update = np.dot(x_train.T, dW2)/N W2=W2-leaming rate * W2 update WI=WI-leaming rate * WI update Streaming output truncated to the last 5000 lines. mse accuracy 0 0.050388 0.976923 mse accuracy 0 0.050387 0.976923 mse accuracy 0 0.050387 0.976923 mse accuracy 0 0.050386 0.976923, mse accuracy 0 0.050385 0.976923 # feedforward Z1 = np.dot(x_test, W1) Al = sigmoid(Z1) 22 = np.dot(Al, W2) A2= sigmoid(Z2) ace = accuracy(A2, y_test) print("Accuracy: {}"Tormat(ace)) Accuracy: 0.95

You might also like