Before you turn this problem in, make sure everything runs as expected.
First, restart the kernel
(in the menubar, select Kernel→Restart) and then run all cells (in the menubar, select Cell→Run
All).
Make sure you fill in any place that says YOUR CODE HERE or "YOUR ANSWER HERE", as well
as your name and collaborators below:
You may remove raise NotImplementedError() line and write your own code.
NAME = ""
WSU_ID = ""
Assignment 1
Q1. Task Description
1. Data Generation:
• Generate two normal distributions each of size 30,000 with feature dimension=100.
(Hint: Shape(2*30000, 100))
• You may choose arbitrary mean and standard deviation.
• Label the first set of feature vectors as "+1" and the second set of feature vectors as
either "-1" or "0."
alt text
import numpy as np
# Step 1: Generate Data
np.random.seed(42) # For reproducibility
# Generate two normal distributions
mean1 = np.random.randn(1, 100)
mean2 = np.random.randn(1, 100)
std_dev = 1.0
data1 = mean1 + std_dev * np.random.randn(30000, 100)
data2 = mean2 + std_dev * np.random.randn(30000, 100)
labels1 = np.ones(30000) # Label the first set as "+1" or "-1"
labels2 = np.zeros(30000)
# Combine data and labels
data = np.vstack((data1, data2))
labels = np.concatenate((labels1, labels2), axis=0)
2. Data Splitting
• Split the generated data into training, validation and test sets in 8:1:1 ratio.
• You are expected to write the code from scratch. (i.e. Do not use train_test_split function
from sklearn)
# Split the data into training (80%), validation (10%), and test (10%)
sets manually
total_samples = len(data)
split_indices = [0.8, 0.9] # Split ratios
split_sizes = [int(split * total_samples) for split in split_indices]
x_train, x_val, x_test = np.split(data, split_sizes, axis=0)
y_train, y_val, y_test = np.split(labels, split_sizes, axis=0)
x_train.shape
(48000, 100)
3. Classifier Implementation
• Implement Linear Classifiers using three different methods. (i.e. Write code from scratch.
Refer lecture slides)
– Random Method
– Perceptron Method
– Gradient Descent
import numpy as np
# Step 3: Classifier Implementation - Random Method with Epochs
def random_classifier(x, y_train, X_val, y_val, num_epochs=10):
"""
Randomly assigns labels (+1 or -1) to input data x for a specified
number of epochs.
Args:
x (numpy.ndarray): Input data with shape (num_samples,
num_features).
num_epochs (int): Number of training epochs.
Returns:
predictions (numpy.ndarray): Randomly assigned labels (+1 or -1)
with shape (num_samples,).
val_accuracy (list): List of validation accuracies after each
epoch.
"""
num_samples = x.shape[0]
val_accuracy = []
for epoch in range(num_epochs):
# Assign random labels for this epoch
predictions = np.random.choice([0, 1], size=num_samples)
# Calculate and print training accuracy for this epoch
correct = np.sum(y_train == predictions)
accuracy = correct / len(y_train)
print(f"Epoch {epoch + 1}/{num_epochs} - Training Accuracy:
{accuracy:.2f}")
predictions = np.random.choice([0, 1], size=len(X_val))
correct = np.sum(y_val == predictions)
val_accuracy.append(correct / len(y_val))
return predictions, val_accuracy
num_epochs = 10
# Use the random classifier with epochs
random_predictions, val_accuracy = random_classifier(x_train, y_train,
x_val, y_val, num_epochs=num_epochs)
# Print validation accuracies after each epoch
for epoch, val_acc in enumerate(val_accuracy):
print(f"Epoch {epoch + 1}/{num_epochs} - Validation Accuracy:
{val_acc:.2f}")
Epoch 1/10 - Training Accuracy: 0.50
Epoch 2/10 - Training Accuracy: 0.50
Epoch 3/10 - Training Accuracy: 0.50
Epoch 4/10 - Training Accuracy: 0.50
Epoch 5/10 - Training Accuracy: 0.50
Epoch 6/10 - Training Accuracy: 0.50
Epoch 7/10 - Training Accuracy: 0.50
Epoch 8/10 - Training Accuracy: 0.50
Epoch 9/10 - Training Accuracy: 0.50
Epoch 10/10 - Training Accuracy: 0.50
Epoch 1/10 - Validation Accuracy: 0.50
Epoch 2/10 - Validation Accuracy: 0.50
Epoch 3/10 - Validation Accuracy: 0.50
Epoch 4/10 - Validation Accuracy: 0.50
Epoch 5/10 - Validation Accuracy: 0.50
Epoch 6/10 - Validation Accuracy: 0.50
Epoch 7/10 - Validation Accuracy: 0.50
Epoch 8/10 - Validation Accuracy: 0.50
Epoch 9/10 - Validation Accuracy: 0.50
Epoch 10/10 - Validation Accuracy: 0.51
import numpy as np
class Perceptron:
def __init__(self, learning_rate, epochs):
self.weights = None
self.bias = None
self.learning_rate = learning_rate
self.epochs = epochs
def activation(self, z):
return np.heaviside(z, 0)
def fit(self, X, y, X_val, y_val):
n_features = X.shape[1]
# Initializing weights and bias
self.weights = np.zeros((n_features))
self.bias = 0
train_accuracies = []
val_accuracies = []
# Iterating until the number of epochs
for epoch in range(self.epochs):
# Traversing through the entire training set
for i in range(len(X)):
z = np.dot(X, self.weights) + self.bias
y_pred = self.activation(z)
# Updating weights and bias
self.weights = self.weights + self.learning_rate *
(y[i] - y_pred[i]) * X[i]
self.bias = self.bias + self.learning_rate * (y[i] -
y_pred[i])
# Calculate training accuracy for this epoch
train_accuracy = np.mean(self.predict(X) == y)
train_accuracies.append(train_accuracy)
# Calculate validation accuracy for this epoch
val_accuracy = np.mean(self.predict(X_val) == y_val)
val_accuracies.append(val_accuracy)
print(f"Epoch {epoch + 1}/{self.epochs} - Training
Accuracy: {train_accuracy:.2f} - Validation Accuracy:
{val_accuracy:.2f}")
return self.weights, self.bias, train_accuracies,
val_accuracies
def predict(self, X):
z = np.dot(X, self.weights) + self.bias
return self.activation(z)
perceptron = Perceptron(0.01, 3)
# Train the model with your training data and validate on validation
data
trained_weights, trained_bias, train_accuracies, val_accuracies =
perceptron.fit(x_train, y_train, x_val, y_val)
# If you have a separate test dataset (replace `test_data` with your
test data)
test_predictions = perceptron.predict(x_test)
# Print the trained weights and bias
print("Trained Weights:", trained_weights)
print("Trained Bias:", trained_bias)
# Print the predictions on the test data
print("Test Predictions:", test_predictions)
# Print the validation accuracies over epochs
print("Validation Accuracies:", val_accuracies)
Epoch 1/3 - Training Accuracy: 1.00 - Validation Accuracy: 1.00
Epoch 2/3 - Training Accuracy: 1.00 - Validation Accuracy: 1.00
Epoch 3/3 - Training Accuracy: 1.00 - Validation Accuracy: 1.00
Trained Weights: [ 0.02930894 0.01577013 0.02865582 0.0143475
0.00191648 -0.01829453
-0.00588742 0.01397218 0.00327979 0.05439526 0.01756178 -
0.00475071
0.01187068 -0.04555749 -0.01602566 0.00112243 -0.01589925 0.013728
-0.01856712 -0.01436198 0.02043848 -0.00858916 -0.01994088 -
0.0220679
-0.02221556 -0.00416195 -0.02028789 0.00435762 -0.00615626
0.01356935
0.01116244 0.01624046 0.01467706 -0.00285822 0.03518707 -
0.01609795
-0.01171295 -0.00303621 -0.01721151 0.02555033 -0.00350199 -
0.02203814
0.0052322 0.00163387 -0.04067547 -0.02737306 0.00533138
0.01603403
0.00976284 -0.02873596 0.00402219 -0.0099823 0.02706815
0.00378555
-0.01541295 0.00844676 -0.00940219 -0.0171186 0.02834484
0.01975563
0.00611272 0.0021402 -0.04979813 -0.01340293 -0.00384768
0.00251796
0.00144038 -0.01788196 0.00655632 -0.00263312 0.01251551
0.01948608
0.01936723 0.01471368 -0.04047635 -0.00654433 0.00207036 -
0.01988373
0.0082425 -0.04606465 -0.00845419 0.02105961 0.03866535 -
0.01093569
0.02134901 -0.03409519 0.01080157 0.01874172 0.00422245 -
0.00257807
-0.00859017 -0.00083997 -0.02720405 0.03081453 -0.00510074 -0.033766
0.02433598 0.00608974 0.01748785 0.00074318]
Trained Bias: 0.0
Test Predictions: [0. 0. 0. ... 0. 0. 0.]
Validation Accuracies: [1.0, 1.0, 1.0]
alpha = 0.01 # Step size
iterations = 10 # No. of iterations
m = y_train.size # No. of training data points
np.random.seed(123) # Set the seed
theta = np.random.rand(x_train.shape[1]) # Pick some random values to
start with, including the bias term
# GRADIENT DESCENT
def gradient_descent(X, y, theta, iterations, alpha):
past_costs = []
past_thetas = [theta]
for i in range(iterations):
prediction = np.dot(X, theta)
error = prediction - y
cost = 1 / (2 * m) * np.dot(error.T, error)
past_costs.append(cost)
theta = theta - (alpha * (1 / m) * np.dot(X.T, error))
past_thetas.append(theta)
return past_thetas, past_costs
# Pass the relevant variables to the function and get the new values
back...
past_thetas, past_costs = gradient_descent(x_train, y_train, theta,
iterations, alpha)
theta = past_thetas[-1]
# Print the results...
print("Gradient Descent Weights:", theta[1:]) # Excluding the bias
term
print("Gradient Descent Bias:", theta[0]) # Bias term
# Training Accuracy Calculation
def calculate_accuracy(X, y, theta):
prediction = np.dot(X, theta)
predicted_labels = np.where(prediction >= 0, 1, 0)
accuracy = np.mean(predicted_labels == y)
return accuracy
training_accuracy = calculate_accuracy(x_train, y_train, theta)
validation_accuracy = calculate_accuracy(x_val, y_val, theta)
print("Training Accuracy:", training_accuracy)
print("Validation Accuracy:", validation_accuracy)
Gradient Descent Weights: [ 0.25627177 0.25038837 0.59917103
0.63970942 0.36930991 0.97167784
0.66674311 0.40117186 0.39193994 0.29455745 0.63114886
0.41072971
-0.08245583 0.25614801 0.6345314 0.10644166 0.18772993
0.41568106
0.39119027 0.65677765 0.75926664 0.64870266 0.47832048
0.61519105
0.28286365 0.26157788 0.23032212 0.23021395 0.55619403
0.05641231
0.50334782 0.39370568 0.37749935 0.44011723 0.19412849
0.40671639
0.6847459 0.76826794 0.47578244 0.60560003 0.10481406
0.28965287
0.3563486 0.68623067 0.17912887 0.42248432 0.96412374
0.48772846
0.4430716 0.12748256 0.72307007 0.50990833 0.531048
0.37290001
0.33818183 0.31291364 0.59443465 0.81903494 0.52219904
0.58004777
0.51087395 0.49544815 0.54350474 0.80537081 0.1565376
0.67935071
0.26833345 0.1985203 0.48198217 0.11375451 0.90546796
0.5662372
0.75083446 -0.15106624 0.58440185 0.50743967 0.11817427
0.14637337
0.48298345 0.26844331 0.65397454 0.60194814 0.31766682
0.78773821
0.72206429 0.37451293 0.05501134 0.25415076 0.40143725
0.64862934
0.9518089 0.27900408 0.68120126 0.51204179 0.53185254
0.16313734
0.37971075 0.21964968 0.30610183]
Gradient Descent Bias: 0.6719870624353718
Training Accuracy: 0.5470833333333334
Validation Accuracy: 0.5073333333333333
4. Performance Evaluation
• Evaluate the performance of each method on the test set using various evaluation
metrics discussed in the class, such as accuracy, precision, recall, F1-score. (You are
expected to write the code from scratch for each of the metrics.)
import numpy as np
# Define evaluation functions
def accuracy(y_true, y_pred):
"""
Calculate accuracy.
Args:
y_true (numpy.ndarray): True labels with shape (num_samples,).
y_pred (numpy.ndarray): Predicted labels with shape
(num_samples,).
Returns:
float: Accuracy.
"""
correct = np.sum(y_true == y_pred)
total = len(y_true)
return correct / total
def precision(y_true, y_pred):
"""
Calculate precision.
Args:
y_true (numpy.ndarray): True labels with shape (num_samples,).
y_pred (numpy.ndarray): Predicted labels with shape
(num_samples,).
Returns:
float: Precision.
"""
true_positive = 0
false_positive = 0
for i in range(len(y_true)):
if y_true[i] == 0 and y_pred[i] == 0:
true_positive += 1
elif y_true[i] == 0 and y_pred[i] == 1:
false_positive += 1
# Check for zero denominator
if true_positive + false_positive == 0:
return 0.0 # Return 0 precision if denominator is zero
return true_positive / (true_positive + false_positive)
def recall(y_true, y_pred):
"""
Calculate recall.
Args:
y_true (numpy.ndarray): True labels with shape (num_samples,).
y_pred (numpy.ndarray): Predicted labels with shape
(num_samples,).
Returns:
float: Recall.
"""
true_positive = 0
false_negative = 0
for i in range(len(y_true)):
if y_true[i] == 0 and y_pred[i] == 0:
true_positive += 1
elif y_true[i] == 0 and y_pred[i] == 1:
false_negative += 1
return true_positive / (true_positive + false_negative)
def f1_score(y_true, y_pred):
"""
Calculate F1-score.
Args:
y_true (numpy.ndarray): True labels with shape (num_samples,).
y_pred (numpy.ndarray): Predicted labels with shape
(num_samples,).
Returns:
float: F1-score.
"""
prec = precision(y_true, y_pred)
rec = recall(y_true, y_pred)
# Check for zero denominator in precision or recall
if prec == 0.0 or rec == 0.0:
return 0.0 # Return 0 F1-score if precision or recall is zero
return 2 * (prec * rec) / (prec + rec)
# Ex
# Predict using the trained model (w and bias obtained from training)
random_pred = np.random.choice([0, 1], size=len(x_test))
# Calculate evaluation metrics
acc = accuracy(y_test, random_pred)
prec = precision(y_test, random_pred)
rec = recall(y_test, random_pred)
f1 = f1_score(y_test, random_pred)
# Print the results
print(f"Random Accuracy: {acc:.2f}")
print(f"Random Precision: {prec:.2f}")
print(f"Random Recall: {rec:.2f}")
print(f"Random F1-Score: {f1:.2f}")
prediction = np.dot(x_test, theta)
grad_predicted = np.where(prediction >= 0, 1, 0)
# Calculate evaluation metrics
acc = accuracy(y_test, grad_predicted)
prec = precision(y_test, grad_predicted)
rec = recall(y_test, grad_predicted)
f1 = f1_score(y_test, grad_predicted)
# Print the results
print(f"Grad Accuracy: {acc:.2f}")
print(f"Grad Precision: {prec:.2f}")
print(f"Grad Recall: {rec:.2f}")
print(f"Grad F1-Score: {f1:.2f}")
preceptron_pred = perceptron.predict(x_test)
acc = accuracy(y_test, preceptron_pred)
prec = precision(y_test, preceptron_pred)
rec = recall(y_test, preceptron_pred)
f1 = f1_score(y_test, preceptron_pred)
# Print the results
print(f"Perceptron Accuracy: {acc:.2f}")
print(f"Preceptron Precision: {prec:.2f}")
print(f"Preceptron Recall: {rec:.2f}")
print(f"Perceptron F1-Score: {f1:.2f}")
Random Accuracy: 0.50
Random Precision: 0.50
Random Recall: 0.50
Random F1-Score: 0.50
Grad Accuracy: 0.51
Grad Precision: 0.51
Grad Recall: 0.51
Grad F1-Score: 0.51
Perceptron Accuracy: 1.00
Preceptron Precision: 1.00
Preceptron Recall: 1.00
Perceptron F1-Score: 1.00
5. Determine the best method
• Analyze the evaluation results to determine which method is the best fit for solving this
problem based on the chosen evaluation metrics. (Write your reasoning)
Best Method is Perceptron and it was decided based on the accuracy, precision, recall, F1-
Score
By following these steps, you can systematically generate, split, implement, and
evaluate the performance of different linear classification methods to identify the
most suitable one for your specific problem.
Extra Credits
• Implement cross validation on the best method and show the results. (Refer the lecture
slides for more information)
import numpy as np
class Perceptron:
def __init__(self, learning_rate, epochs):
self.weights = None
self.bias = None
self.learning_rate = learning_rate
self.epochs = epochs
def activation(self, z):
return np.heaviside(z, 0)
def fit(self, X, y):
n_features = X.shape[1]
# Initializing weights and bias
self.weights = np.zeros((n_features))
self.bias = 0
# Iterating until the number of epochs
for epoch in range(self.epochs):
# Traversing through the entire training set
for i in range(len(X)):
z = np.dot(X, self.weights) + self.bias
y_pred = self.activation(z)
# Updating weights and bias
self.weights = self.weights + self.learning_rate *
(y[i] - y_pred[i]) * X[i]
self.bias = self.bias + self.learning_rate * (y[i] -
y_pred[i])
def predict(self, X):
z = np.dot(X, self.weights) + self.bias
return self.activation(z)
def k_fold_cross_validation(X, y, k, learning_rate, epochs):
fold_size = len(X) // k
val_accuracies = []
for i in range(k):
# Split the data into training and validation sets for this
fold
val_start = i * fold_size
val_end = (i + 1) * fold_size
x_val_fold = X[val_start:val_end]
y_val_fold = y[val_start:val_end]
x_train_fold = np.concatenate((X[:val_start], X[val_end:]),
axis=0)
y_train_fold = np.concatenate((y[:val_start], y[val_end:]),
axis=0)
# Create and train the Perceptron model
perceptron = Perceptron(learning_rate, epochs)
perceptron.fit(x_train_fold, y_train_fold)
# Calculate validation accuracy for this fold
val_accuracy = np.mean(perceptron.predict(x_val_fold) ==
y_val_fold)
val_accuracies.append(val_accuracy)
return val_accuracies
learning_rate = 0.01
epochs = 3
k = 5 # Number of folds for cross-validation
# Perform k-fold cross-validation
val_accuracies = k_fold_cross_validation(x_train, y_train, k,
learning_rate, epochs)
# Print the validation accuracies for each fold
for i, val_accuracy in enumerate(val_accuracies):
print(f"Fold {i + 1} Validation Accuracy: {val_accuracy:.2f}")
# Calculate the average validation accuracy over all folds
average_val_accuracy = np.mean(val_accuracies)
print(f"Average Validation Accuracy: {average_val_accuracy:.2f}")