#1. Implement A* Search algorithm.
class Graph:
def __init__(self, adjac_lis):
self.adjac_lis = adjac_lis
def get_neighbors(self, v):
return self.adjac_lis[v]
def h(self, n):
H = {
'A': 1,
'B': 1,
'C': 1,
'D': 1,
'F': 1,
}
return H[n]
def a_star_algorithm(self, start, stop):
open_lst = set([start])
closed_lst = set([])
cost = {}
cost[start] = 0
adj_list = {}
adj_list[start] = start
while len(open_lst) > 0:
n = None
for v in open_lst:
if n == None or cost[v] + self.h(v) < cost[n] + self.h(n):
n = v
if n == None:
print('Path does not exist!')
return None
if n == stop:
reconst_path = []
while adj_list[n] != n:
reconst_path.append(n)
n = adj_list[n]
reconst_path.append(start)
reconst_path.reverse()
print('Path found: {}'.format(reconst_path))
return reconst_path
for (m, weight) in self.get_neighbors(n):
if m not in open_lst and m not in closed_lst:
open_lst.add(m)
adj_list[m] = n
cost[m] = cost[n] + weight
else:
if cost[m] > cost[n] + weight:
cost[m] = cost[n] + weight
adj_list[m] = n
# if m in closed_lst:
# closed_lst.remove(m)
# open_lst.add(m)
open_lst.remove(n)
closed_lst.add(n)
print('Path does not exist!')
return None
adjac_lis = {
'A': [('B', 1), ('C', 3), ('D', 7)],
'B': [('D', 5),('F', 2)],
'C': [('D', 12)],
'F': [('D',1)]
}
graph1 = Graph(adjac_lis)
graph1.a_star_algorithm('A', 'D')
Output:
Path found: ['A', 'B', 'F', 'D']
#2. Implement AO* Search algorithm.
class Graph:
def __init__(self, graph, heuristicNodeList, startNode): #instantiate graph object with graph
topology, heuristic values, start node
self.graph = graph
self.H=heuristicNodeList
self.start=startNode
self.parent={}
self.status={}
self.solutionGraph={}
def applyAOStar(self): # starts a recursive AO* algorithm
self.aoStar(self.start, False)
def getNeighbors(self, v): # gets the Neikghbors of a given node
return self.graph.get(v,'')
def getStatus(self,v): # return the status of a given node
return self.status.get(v,0)
def setStatus(self,v, val): # set the status of a given node
self.status[v]=val
def getHeuristicNodeValue(self, n):
return self.H.get(n,0) # always return the heuristic value of a given node
def setHeuristicNodeValue(self, n, value):
self.H[n]=value # set the revised heuristic value of a given node
def printSolution(self):
print("FOR GRAPH SOLUTION, TRAVERSE THE GRAPH FROM THE START NODE:",self.start)
print("------------------------------------------------------------")
print(self.solutionGraph)
print("------------------------------------------------------------")
def computeMinimumCostChildNodes(self, v): # Computes the Minimum Cost of child nodes of a
given node v
minimumCost=0
costToChildNodeListDict={}
costToChildNodeListDict[minimumCost]=[]
flag=True
for nodeInfoTupleList in self.getNeighbors(v): # iterate over all the set of child node/s
cost=0
nodeList=[]
for c, weight in nodeInfoTupleList:
cost=cost+self.getHeuristicNodeValue(c)+weight
nodeList.append(c)
if flag==True: # initialize Minimum Cost with the cost of first set of child node/s
minimumCost=cost
costToChildNodeListDict[minimumCost]=nodeList # set the Minimum Cost child node/s
flag=False
else: # checking the Minimum Cost nodes with the current Minimum Cost
if minimumCost>cost:
minimumCost=cost
costToChildNodeListDict[minimumCost]=nodeList # set the Minimum Cost child
node/s
return minimumCost, costToChildNodeListDict[minimumCost] # return Minimum Cost and Minimum
Cost child node/s
def aoStar(self, v, backTracking): # AO* algorithm for a start node and backTracking status flag
print("HEURISTIC VALUES :", self.H)
print("SOLUTION GRAPH :", self.solutionGraph)
print("PROCESSING NODE :", v)
print(
"-----------------------------------------------------------------------------------------")
if self.getStatus(v) >= 0: # if status node v >= 0, compute Minimum Cost nodes of v
minimumCost, childNodeList = self.computeMinimumCostChildNodes(v)
print(minimumCost, childNodeList)
self.setHeuristicNodeValue(v, minimumCost)
self.setStatus(v,len(childNodeList))
solved=True # check the Minimum Cost nodes of v are solved
for childNode in childNodeList:
self.parent[childNode]=v
if self.getStatus(childNode)!=-1:
solved=solved & False
if solved==True: # if the Minimum Cost nodes of v are solved, set the current node
status as solved(-1)
self.setStatus(v,-1)
self.solutionGraph[v]=childNodeList # update the solution graph with the solved
nodes which may be a part of solution
if v!=self.start: # check the current node is the start node for backtracking the
current node value
self.aoStar(self.parent[v], True) # backtracking the current node value with
backtracking status set to true
if backTracking==False: # check the current call is not for backtracking
for childNode in childNodeList: # for each Minimum Cost child node
self.setStatus(childNode,0) # set the status of child node to 0(needs
exploration)
self.aoStar(childNode, False) # Minimum Cost child node is further explored
with backtracking status as false
#for simplicity we ll consider heuristic distances given
print ("Graph - 1")
h1 = {'A': 1, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J': 1}
graph1 = {
'A': [[('B', 1), ('C', 1)], [('D', 1)]],
'B': [[('G', 1)], [('H', 1)]],
'C': [[('J', 1)]],
'D': [[('E', 1), ('F', 1)]],
'G': [[('I', 1)]]
}
G1= Graph(graph1, h1, 'A')
G1.applyAOStar()
G1.printSolution()
Output:
Graph - 1
HEURISTIC VALUES : {'A': 1, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J': 1}
SOLUTION GRAPH : {}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
10 ['B', 'C']
HEURISTIC VALUES : {'A': 10, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : B
-----------------------------------------------------------------------------------------
6 ['G']
HEURISTIC VALUES : {'A': 10, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
10 ['B', 'C']
HEURISTIC VALUES : {'A': 10, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 5, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : G
-----------------------------------------------------------------------------------------
8 ['I']
HEURISTIC VALUES : {'A': 10, 'B': 6, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 8, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : B
-----------------------------------------------------------------------------------------
8 ['H']
HEURISTIC VALUES : {'A': 10, 'B': 8, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 8, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
12 ['B', 'C']
HEURISTIC VALUES : {'A': 12, 'B': 8, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 8, 'H': 7, 'I': 7, 'J':
1}
SOLUTION GRAPH : {}
PROCESSING NODE : I
-----------------------------------------------------------------------------------------
0 []
HEURISTIC VALUES : {'A': 12, 'B': 8, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 8, 'H': 7, 'I': 0, 'J':
1}
SOLUTION GRAPH : {'I': []}
PROCESSING NODE : G
-----------------------------------------------------------------------------------------
1 ['I']
HEURISTIC VALUES : {'A': 12, 'B': 8, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J':
1}
SOLUTION GRAPH : {'I': [], 'G': ['I']}
PROCESSING NODE : B
-----------------------------------------------------------------------------------------
2 ['G']
HEURISTIC VALUES : {'A': 12, 'B': 2, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J':
1}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G']}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
6 ['B', 'C']
HEURISTIC VALUES : {'A': 6, 'B': 2, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J': 1}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G']}
PROCESSING NODE : C
-----------------------------------------------------------------------------------------
2 ['J']
HEURISTIC VALUES : {'A': 6, 'B': 2, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J': 1}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G']}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
6 ['B', 'C']
HEURISTIC VALUES : {'A': 6, 'B': 2, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J': 1}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G']}
PROCESSING NODE : J
-----------------------------------------------------------------------------------------
0 []
HEURISTIC VALUES : {'A': 6, 'B': 2, 'C': 2, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J': 0}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G'], 'J': []}
PROCESSING NODE : C
-----------------------------------------------------------------------------------------
1 ['J']
HEURISTIC VALUES : {'A': 6, 'B': 2, 'C': 1, 'D': 12, 'E': 2, 'F': 1, 'G': 1, 'H': 7, 'I': 0, 'J': 0}
SOLUTION GRAPH : {'I': [], 'G': ['I'], 'B': ['G'], 'J': [], 'C': ['J']}
PROCESSING NODE : A
-----------------------------------------------------------------------------------------
5 ['B', 'C']
FOR GRAPH SOLUTION, TRAVERSE THE GRAPH FROM THE START NODE: A
------------------------------------------------------------
{'I': [], 'G': ['I'], 'B': ['G'], 'J': [], 'C': ['J'], 'A': ['B', 'C']}
------------------------------------------------------------
# 3. For a given set of training data examples stored in a .CSV file, implement and
#demonstrate the Candidate-Elimination algorithmto output a description of the set of
#all hypotheses consistent with the training examples.
import csv
import numpy as np
with open('dataset.csv','r') as f:
reads=csv.reader(f)
tmp_lst=np.array(list(reads))
#print(tmp_lst)
# all the rows except last col
concept=np.array(tmp_lst[:,:-1])
# all the rows only last col
target=np.array(tmp_lst[:,-1])
for i in range(len(target)):
if(target[i]=='yes'):
specific_h=concept[i]
break
h=[]
generic_h=[['?' for i in range (len(specific_h))]for i in range (len(specific_h))]
print(generic_h)
for i in range(len(target)):
if(target[i]=='yes'):
for j in range (len(specific_h)):
if(specific_h[j]!=concept[i][j]):
specific_h[j]='?'
generic_h[j][j]='?'
else:
for j in range(len(specific_h)):
if(specific_h[j]!=concept[i][j]):
generic_h[j][j]=specific_h[j]
# print(generic_h)
else:
generic_h[j][j]='?'
# print(j,generic_h[j])
print("Step ",i+1)
print("The most generic is : ",generic_h)
print("The most specific is : ",specific_h)
Output:
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?']]
Step 1
The most generic is : [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
The most specific is : ['sunny' 'warm' 'normal' 'strong' 'warm' 'same']
Step 2
The most generic is : [['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
The most specific is : ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Step 3
The most generic is : [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', 'same']]
The most specific is : ['sunny' 'warm' '?' 'strong' 'warm' 'same']
Step 4
The most generic is : [['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?']]
The most specific is : ['sunny' 'warm' '?' 'strong' '?' '?']
#4. Write a program to demonstrate the working of the decision tree based ID3 algorithm.
#Use an appropriate data set for building the decision tree and apply this knowledge toclassify a
new sample.
import math
import csv
def load_csv(filename):
lines=csv.reader(open(filename,"r"));
dataset = list(lines)
headers = dataset.pop(0)
return dataset,headers
class Node:
def __init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic={}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0]*len(attr)
r=len(data)
c=len(data[0])
for x in range(len(attr)):
for y in range(r):
if data[y][col]==attr[x]:
counts[x]+=1
for x in range(len(attr)):
dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
pos=0
for y in range(r):
if data[y][col]==attr[x]:
if delete:
del data[y][col]
dic[attr[x]][pos]=data[y]
pos+=1
return attr,dic
def entropy(S):
attr=list(set(S))
if len(attr)==1:
return 0
counts=[0,0]
for i in range(2):
counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
sums=0
for cnt in counts:
sums+=-1*cnt*math.log(cnt,2)
return sums
def compute_gain(data,col):
attr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
ratio=[0]*len(attr)
total_entropy=entropy([row[-1] for row in data])
for x in range(len(attr)):
ratio[x]=len(dic[attr[x]])/(total_size*1.0)
entropies[x]=entropy([row[-1] for row in dic[attr[x]]])
total_entropy-=ratio[x]*entropies[x]
return total_entropy
def build_tree(data,features):
lastcol=[row[-1] for row in data]
if(len(set(lastcol)))==1:
node=Node("")
node.answer=lastcol[0]
return node
n=len(data[0])-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features[split])
fea = features[:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dic[attr[x]],fea)
node.children.append((attr[x],child))
return node
def print_tree(node,level):
if node.answer!="":
print(" "*level,node.answer)
return
print(" "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,level+2)
def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
return
pos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos]==value:
classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("dataset.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
Output:
The decision tree for the dataset using ID3 algorithm is
outlook
overcast
yes
sunny
humidity
normal
yes
high
no
rain
windy
weak
yes
strong
no
#5. Build an Artificial Neural Network by implementing the Backpropagation algorithm
#and test the same using appropriate data sets.
import numpy as np
X=np.array(([2,9],[1,5],[3,6]),dtype=float)
y=np.array(([92],[86],[89]),dtype=float)
X=X/np.amax(X,axis=0)
y=y/100
def sigmoid(x):
return 1/(1+np.exp(-x))
def derivatives_sigmoid(x):
return x*(1-x)
epoch=7000
lr=0.25
inputlayer_neurons=2
hiddenlayer_neurons=3
output_neurons=1
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
for i in range(epoch):
hinp1 = np.dot(X, wh)
hinp = hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1 = np.dot(hlayer_act, wout)
outinp = outinp1 + bout
output = sigmoid(outinp)
EO = y - output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) * lr
wh += X.T.dot(d_hiddenlayer) * lr
print("Input=\n"+str(X))
print("Actual output:\n"+str(y))
print("predicated output:",output)
Output:
Input=
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual output:
[[0.92]
[0.86]
[0.89]]
predicated output: [[0.89577227]
[0.88071767]
[0.89316361]]
#6. Write a program to implement the naïve Bayesian classifier for a sample training data set
stored as a .CSV file.
#Compute the accuracy of the classifier, considering few test data sets.
import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('nb.csv')
print("The first 5 values of data is :\n",data.head())
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
print("\nNow the Train data is :\n",X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
classifier = GaussianNB()
classifier.fit(X_train,y_train)
from sklearn.metrics import accuracy_score
print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
Output:
The first 5 values of data is :
Outlook Temperature Humidity Windy Playtennis
0 sunny hot high weak no
1 sunny hot high strong no
2 overcast hot high weak yes
3 rain mild high weak yes
4 rain cool normal weak yes
The First 5 values of train data is
Outlook Temperature Humidity Windy
0 sunny hot high weak
1 sunny hot high strong
2 overcast hot high weak
3 rain mild high weak
4 rain cool normal weak
The first 5 values of Train output is
0 no
1 no
2 yes
3 yes
4 yes
Name: Playtennis, dtype: object
Now the Train data is :
Outlook Temperature Humidity Windy
0 2 1 0 1
1 2 1 0 0
2 0 1 0 1
3 1 2 0 1
4 1 0 1 1
Now the Train output is
[0 0 1 1 1 0 1 0 1 1 1 1 1 0]
Accuracy is: 0.3333333333333333
#7. Apply EM algorithm to cluster a set of data stored in a .CSV file.
#Use the same data set for clustering using k-Means algorithm.
#Compare the results of these two algorithms and comment on the quality of clustering.
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
# Load the data from the CSV file
df = pd.read_csv('iris.csv')
# Assuming your data has features in columns, extract them
features = df[['SepalLengthCm', 'SepalWidthCm','PetalLengthCm','PetalWidthCm']].values
# Standardize the data
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)
# Apply EM algorithm
em_model = GaussianMixture(n_components=3, random_state=42)
em_labels = em_model.fit_predict(features_standardized)
# Apply k-Means algorithm
kmeans_model = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans_model.fit_predict(features_standardized)
# Plot the results
plt.figure(figsize=(12, 5))
# Plot for EM algorithm
plt.subplot(1, 2, 1)
plt.scatter(features_standardized[:, 0], features_standardized[:, 1], c=em_labels, cmap='viridis',
edgecolor='k')
plt.title('EM Clustering')
# Plot for k-Means algorithm
plt.subplot(1, 2, 2)
plt.scatter(features_standardized[:, 0], features_standardized[:, 1], c=kmeans_labels, cmap=
'viridis', edgecolor='k')
plt.title('k-Means Clustering')
plt.show()
# Calculate Adjusted Rand Index
ari_em = adjusted_rand_score(df['Species'], em_labels)
ari_kmeans = adjusted_rand_score(df['Species'], kmeans_labels)
# Print Adjusted Rand Index
print(f"Adjusted Rand Index for EM: {ari_em}")
print(f"Adjusted Rand Index for k-Means: {ari_kmeans}")
Output:
Adjusted Rand Index for EM: 0.5073487662737015
Adjusted Rand Index for k-Means: 0.6201351808870379
#8. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.
#Print both correct and wrong predictions. Java/Python ML library classes can be used for this
problem.
from sklearn import datasets
iris=datasets.load_iris()
iris_data=iris.data
iris_labels=iris.target
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(iris_data,iris_labels,test_size=0.30)
from sklearn.neighbors import KNeighborsClassifier
classifier=KNeighborsClassifier(n_neighbors=5)
classifier.fit(x_train,y_train)
y_pred=classifier.predict(x_test)
from sklearn.metrics import classification_report,confusion_matrix
print('Confusion matrix is as follows')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Matrics')
print(classification_report(y_test,y_pred))
Output:
Confusion matrix is as follows
[[15 0 0]
[ 0 15 1]
[ 0 1 13]]
Accuracy Matrics
precision recall f1-score support
0 1.00 1.00 1.00 15
1 0.94 0.94 0.94 16
2 0.93 0.93 0.93 14
accuracy 0.96 45
macro avg 0.96 0.96 0.96 45
weighted avg 0.96 0.96 0.96 45
#9. Implement the non-parametric Locally Weighted Regressionalgorithm in order to fit data points.
#Select appropriate data set for your experiment and draw graphs
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def kernel(point, xmat, k):
m,n=np.shape(xmat) #size of matrix m
weights=np.mat(np.eye(m)) #np.eye returns mat with 1 in the diagonal
for j in range(m):
diff=point-xmat[j]
weights[j,j]=np.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
W=(xmat.T*(wei*xmat)).I*(xmat.T*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
row,col=np.shape(xmat) #return 244 rows and 2 columns
ypred=np.zeros(row)
for i in range(row):
ypred[i]=xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
data=pd.read_csv('dataset.csv')
bill=np.array(data.total_bill)
tip=np.array(data.tip)
mbill=np.mat(bill)
mtip=np.mat(tip)
mbillMatCol=np.shape(mbill)[1] # 1 for vertical i.e columns
onesArray=np.mat(np.ones(mbillMatCol))
xmat=np.hstack((onesArray.T,mbill.T)) #hstack concate horizontal lists it takes one value from the
fist and one from the second
print(xmat)
ypred=localWeightRegression(xmat,mtip,2)
SortIndex=xmat[ :,1].argsort(0) #argsort take the index of each and sort them according to the
orginal value
xsort=xmat[SortIndex][:,0]
fig= plt.figure()
ax=fig.add_subplot(1,1,1)
ax.scatter(bill,tip,color='blue')
ax.plot(xsort[:,1],ypred[SortIndex],color='red',linewidth=1)
plt.xlabel('Total bill')
plt.ylabel('tip')
plt.show()