Index
S.no. Title Date Sign
1 Extract the data from the database using
Python
2 Write a program to Result Prediction
3 Write a program to Heart Disease predictor
4 Write a program to Computer accuracy of
classifier.
5 Write a program to Diabetes Prediction Model
6 Build an Artificial Neural Network (ANN) by
implementing the Back propagation algorithm
and test the same using appropriate data sets.
7 Apply k-Means algorithm k-Means algorithm
to cluster a set of data stored in a .CSV file.
Use the same data set for clustering using the
k- Means algorithm. Compare the results of
these two algorithms and comment on the
quality of clustering. You can add Python ML
library classes in the program.
8 Write a program to implement Self-
Organizing Map (SOM).
9 Write a program for empirical comparison of
different supervised
10 Write a program for empirical comparison of
different unsupervised learning algorithms.
Program – 1
Q1. Extract the data from the database using Python.
Code:
# Python code to extract data from MySQL database.
import mysql. connector as a
mycon=a.connect(host="localhost",user="root",passwd="1234",database="12a")
cursor=mycon.cursor()
cursor.execute("select * from student")
data=cursor.fetchall()
for i in data:
print(i)
Output:
Program – 2
Q2. Write a program to Result Prediction.
Code:
# Result Prediction Model using Logistic Regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data = pd.read_csv("marks.csv")
print(data.head())
data['RESULT'].fillna(0, inplace=True) # Filling NaN with 0 for simplicity
X = data.drop(columns='RESULT', axis=1)
Y = data['RESULT']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y,
random_state=2)
model = LogisticRegression()
model.fit(X_train, Y_train)
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print("Accuracy on Training data:", training_data_accuracy)
X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print("Accuracy on Test data:", test_data_accuracy)
input_data = np.array([90, 50]).reshape(1, -1)
prediction = model.predict(input_data)
print("Prediction for example input data:", prediction[0])
input_data = np.array([70, 25]).reshape(1, -1)
prediction = model.predict(input_data)
print("Prediction for additional input data:", prediction[0])
if prediction[0] == 0:
print("FAIL")
else:
print("PASS")
Output:
Program – 3
Q3. Write a program to Heart Disease predictor.
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
data=pd.read_csv("heart_data.csv")
data.head()
print(data)
data.isnull().sum()
data['HeartDisease'].value_counts()
X=data.drop(columns='HeartDisease',axis=1)
Y=data['HeartDisease']
print(X)
print(Y)
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
print(X.shape,X_train.shape,X_test.shape)
model=LogisticRegression()
model.fit(X_train,Y_train)
X_train_prediction=model.predict(X_train)
training_data_accuracy=accuracy_score(X_train_prediction,Y_train)
print("Accuracy on Training data",training_data_accuracy)
X_test_prediction=model.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print("Accuracy on Test data:",test_data_accuracy)
input_data=(74,0,2,20,264,0,2,121,1,0.2,1,1,3)
input_data_as_numpy_array=np.asarray(input_data)
input_data_reshaped=input_data_as_numpy_array.reshape(1,-1)
prediction=model.predict(input_data_reshaped)
print(prediction)
if(prediction[0]==0):
print("The person does not have Heart Disease")
else:
print("The person has Heart Disease")
Output:
Program – 4
Q4. Write a program to Computer accuracy of classifier.
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_iris
# Load the Iris dataset
iris = load_iris()
# Create a DataFrame using Pandas
iris_df = pd.DataFrame(data=np.c_[iris['data'], iris['target']], columns=iris['feature_names'] +
['target'])
# Save the DataFrame to a CSV file
iris_df.to_csv('iris_dataset.csv', index=False)
# Load the Iris dataset from the saved file
loaded_df = pd.read_csv('iris_dataset.csv')
X_loaded = loaded_df.drop('target', axis=1).values
y_loaded = loaded_df['target'].values
# Split the loaded dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_loaded, y_loaded, test_size=0.2,
random_state=42)
# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
# Train the classifier on the training set
clf.fit(X_train, y_train)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))
Output:
Program – 5
Q5. Write a program to Diabetes Prediction Model.
Code:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
data=pd.read_csv('diabetes2.csv')
print(data)
X=data.drop(columns='Report',axis=1)
Y=data['Report']
scaler=StandardScaler()
scaler.fit(X)
StandardScaler(copy=True,with_mean=True,with_std=True)
standardized_data=scaler.transform(X)
print(standardized_data)
X=standardized_data
Y=data['Report']
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,stratify=Y,random_state=2)
X_test_prediction=classifier.predict(X_test)
test_data_accuracy=accuracy_score(X_test_prediction,Y_test)
print('Accuracy score of the test data:',test_data_accuracy)
input_data=(85,60,23,8,30.1,0.163,57)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
std_data = scaler.transform(input_data_reshaped)
print(std_data)
prediction=classifier.predict(std_data)
print(prediction)
if(prediction[0]==0):
print("The person is not diabetic")
else:
print("The person is diabetic")
Output:
Program – 6
Q6 Build an Artificial Neural Network (ANN) by implementing the Back propagation
algorithm and test the same using appropriate data sets.
Code:
import numpy as np
class NeuralNetwork:
def init (self, input_size, hidden_size, output_size, learning_rate=0.01):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
# Initialize weights and biases
self.weights_input_hidden = np.random.rand(self.input_size, self.hidden_size)
self.bias_hidden = np.zeros((1, self.hidden_size))
self.weights_hidden_output = np.random.rand(self.hidden_size,
self.output_size) self.bias_output = np.zeros((1, self.output_size))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self,
x): return x * (1 - x)
def forward(self, X):
# Forward pass through the network
self.hidden_input = np.dot(X, self.weights_input_hidden) + self.bias_hidden
self.hidden_output = self.sigmoid(self.hidden_input)
self.output_input = np.dot(self.hidden_output, self.weights_hidden_output) +
self.bias_output
self.predicted_output = self.sigmoid(self.output_input)
return
self.predicted_output def
backward(self, X, y, output):
# Backward pass through the
network error = y - output
output_delta = error * self.sigmoid_derivative(output)
hidden_error =
output_delta.dot(self.weights_hidden_output.T)
hidden_delta = hidden_error *
self.sigmoid_derivative(self.hidden_output) # Update weights and biases
self.weights_hidden_output += self.hidden_output.T.dot(output_delta) *
self.learning_rate
self.bias_output += np.sum(output_delta, axis=0, keepdims=True) *
self.learning_rate self.weights_input_hidden += X.T.dot(hidden_delta) *
self.learning_rate self.bias_hidden += np.sum(hidden_delta, axis=0,
keepdims=True) * self.learning_rate
def train(self, X, y, epochs):
for epoch in
range(epochs): #
Forward pass
output = self.forward(X)
# Backward pass and optimization
self.backward(X, y, output)
# Print the mean squared error at each
epoch mse = np.mean((y - output) **
2)
if epoch % 1000 == 0:
print(f"Epoch {epoch}, Mean Squared Error: {mse:.4f}")
# Sample dataset (XOR problem)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Initialize and train the neural
network input_size = 2
hidden_size = 4
output_size = 1
nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X, y, epochs=10000)
# Test the trained network
predictions = nn.forward(X)
print("\nPredictions:")
print(predictions)
Output:
Program – 7
Q7. Apply k-Means algorithm k-Means algorithm to cluster a set of data stored in
a .CSV file. Use the same data set for clustering using the k- Means algorithm.
Compare the results of these two algorithms and comment on the quality of
clustering. You can add Python ML library classes in the program.
Code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans,
AgglomerativeClustering from sklearn.preprocessing
import StandardScaler
from sklearn.metrics import adjusted_rand_score
# Load the Iris dataset (you can replace this with your CSV
file) from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y_true =
iris.target
# Standardize the data
scaler =
StandardScaler()
X_std =
scaler.fit_transform(X) #
Perform k-Means clustering
kmeans = KMeans(n_clusters=3,
random_state=42) kmeans_labels =
kmeans.fit_predict(X_std)
# Perform hierarchical clustering (Agglomerative Clustering)
hierarchical = AgglomerativeClustering(n_clusters=3)
hierarchical_labels = hierarchical.fit_predict(X_std)
# Visualize the results
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_std[:, 0], X_std[:, 1], c=kmeans_labels,
cmap='viridis') plt.title('k-Means Clustering')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], marker='X',
s=200, c='red')
plt.subplot(1, 2, 2)
plt.scatter(X_std[:, 0], X_std[:, 1], c=hierarchical_labels, cmap='viridis')
plt.title('Hierarchical Clustering')
plt.show()
# Evaluate clustering results using adjusted Rand index
ari_kmeans = adjusted_rand_score(y_true, kmeans_labels)
ari_hierarchical = adjusted_rand_score(y_true,
hierarchical_labels) print(f"Adjusted Rand Index (k-Means):
{ari_kmeans:.4f}") print(f"Adjusted Rand Index
(Hierarchical): {ari_hierarchical:.4f}")
Output
Program – 8
Q8. Write a program to implement Self-Organizing Map (SOM).
Code:
import numpy as np
import matplotlib.pyplot as plt
class SOM:
def init (self, input_size, map_size, learning_rate=0.1):
self.input_size = input_size
self.map_size = map_size
self.learning_rate = learning_rate
# Initialize the SOM weights
self.weights = np.random.rand(map_size[0], map_size[1], input_size)
def find_winner(self, input_vector):
# Find the winning neuron (the one with the closest weight
vector) distances = np.linalg.norm(self.weights - input_vector,
axis=-1) winner = np.unravel_index(np.argmin(distances),
distances.shape) return winner
def update_weights(self, input_vector,
winner): # Update the weights of the
neurons
for i in range(self.map_size[0]):
for j in
range(self.map_size[1]):
# Update each weight vector
distance = np.linalg.norm(np.array([i, j]) - np.array(winner))
influence = np.exp(-distance / (2 * 0.5**2)) # Influence function (e.g., Gaussian)
self.weights[i, j] += self.learning_rate * influence * (input_vector - self.weights[i, j])
def train(self, data, epochs):
for epoch in
range(epochs):
for input_vector in data:
# Find the winner and update weights for each input
vector winner = self.find_winner(input_vector)
self.update_weights(input_vector, winner)
def predict(self, data):
# Find the winning neuron for each input vector in the dataset
winners = [self.find_winner(input_vector) for input_vector in
data] return np.array(winners)
# Example usage with a 2D dataset
data = np.array([[1, 2],
[5, 6],
[8, 7],
[2, 1]])
# Normalize the data to be in the range [0, 1]
data_normalized = data / np.max(data)
# Initialize SOM
input_size = data.shape[1]
map_size = (5, 5) # Adjust the map size based on your
dataset som = SOM(input_size, map_size,
learning_rate=0.1)
# Train the SOM
epochs = 1000
som.train(data_normalized, epochs)
# Get the predicted winning neurons for each input vector
predicted_winners = som.predict(data_normalized)
# Visualize the SOM and the input vectors
plt.figure(figsize=(8, 8))
plt.scatter(data_normalized[:, 0], data_normalized[:, 1], label='Input Vectors')
plt.scatter(predicted_winners[:, 0] / (map_size[0] - 1), predicted_winners[:, 1] /
(map_size[1] - 1), marker='X', c='red', label='SOM Winners')
plt.legend()
plt.title('Self-Organizing Map (SOM) Results')
plt.show()
Output
Program – 9
Q9) Write a program for empirical comparison of different supervised
Code:
import numpy as np
import pandas as pd
from sklearn.model_selection import
train_test_split from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,
classification_report from sklearn.datasets import load_iris
# Load the Iris dataset
iris = load_iris()
X = iris.data
y=
iris.target
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42) # Support Vector Machines (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test,
svm_predictions)
# Decision Trees
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)
dt_predictions =
dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test,
dt_predictions) # Random Forests
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)
rf_predictions =
rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
# Print the results
print("Support Vector Machines Accuracy:",
svm_accuracy) print("Decision Trees Accuracy:",
dt_accuracy) print("Random Forests Accuracy:",
rf_accuracy)
# Additional information: Classification Report
print("\nSupport Vector Machines Classification
Report:") print(classification_report(y_test,
svm_predictions)) print("\nDecision Trees
Classification Report:")
print(classification_report(y_test, dt_predictions))
print("\nRandom Forests Classification Report:")
print(classification_report(y_test, rf_predictions))
Output
Program – 10
Q10. Write a program for empirical comparison of different
unsupervised learning algorithms.
Code:
import
numpy as np
import
pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering,
DBSCAN from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
from sklearn.preprocessing import
StandardScaler from sklearn.metrics
import silhouette_score
# Load the Iris
dataset iris =
load_iris()
X=
iris.d
ata y
iris.t
arget
# Standardize the
features scaler =
StandardScaler()
X_scaled = scaler.fit_transform(X)
# Reduce dimensionality for
visualization pca =
PCA(n_components=2)
X_pca =
pca.fit_transform(X_scaled)
# Initialize clustering
algorithms
kmeans = KMeans(n_clusters=3,
random_state=42) hierarchical =
AgglomerativeClustering(n_clusters=3)
dbscan = DBSCAN(eps=0.5,
min_samples=5)
# Perform clustering
kmeans_labels =
kmeans.fit_predict(X_scaled)
hierarchical_labels =
hierarchical.fit_predict(X_scaled)
dbscan_labels =
dbscan.fit_predict(X_scaled)
# Visualize the results
plt.figure(figsize=(15,
5))
plt.subplot(1, 3, 1)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis',
marker='o', edgecolors='k')
plt.title('K-Means
Clustering')
plt.subplot(1, 3, 2)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=hierarchical_labels, cmap='viridis',
marker='o', edgecolors='k')
plt.title("Hierarchical Clustering")
plt.subplot(1, 3, 3)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=dbscan_labels, cmap='viridis',
marker='o', edgecolors='k')
plt.title('DBSCAN Clustering')
plt.tight_layout()
plt.show()
# Evaluate the quality of clustering using silhouette score
silhouette_kmeans = silhouette_score(X_scaled,
kmeans_labels) silhouette_hierarchical =
silhouette_score(X_scaled, hierarchical_labels)
silhouette_dbscan = silhouette_score(X_scaled,
dbscan_labels) print("Silhouette Score (K-Means):",
silhouette_kmeans) print("Silhouette Score
(Hierarchical):", silhouette_hierarchical) print("Silhouette
Score (DBSCAN):", silhouette_dbscan)
Output