HW5
Results of original assignment
Conclusion/discussion
I experimented with the epochs and learning rate and found 1000 and 0.5 to be good. Even 100 epochs
produced good enough results. Test results error are very low, even with 8 corrupted pixels (<0.1 avg
MSE). However, there seem to be overfitting, based on the high MSE for 8 corrupted pixels compared to
8.
Results of extra credit
Conclusion/discussion
The network seemed to overfit more than the original assignment with the ASCII case. We can see this
by seeing how fast the MSE converges on training, but produces a high MSE at 8 corrupted pixels
(compared to 4)
Backprop code
function hw4
% define constants
trainingEpochs = 1000;
testingEpochs = 1000;
learningRate = 0.5;
noOfNeurons = 4;
error(1:trainingEpochs+1) = 0;
error1(1:testingEpochs+1) = 0;
error2(1:testingEpochs+1) = 0;
error3(1:testingEpochs+1) = 0;
noOfIterations(1:trainingEpochs+1) = 0;
% initial weights and biases (random b/w -0.5 and 0.5)
% 3 neuron hidden layer
W1 = (-0.5 + (0.5+0.5)*rand(30,noOfNeurons))';
b1 = (-0.5 + (0.5+0.5)*rand(1,noOfNeurons))';
W2 = -0.5 + (0.5+0.5)*rand(3,noOfNeurons);
b2 = -0.5 + (0.5+0.5)*rand(1,1);
% inputs p0 = 0, p1 = 1, p2 = 2
p0 = [-1 1 1 1 1 -1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 -1 1 1
1 1 -1]';
p1 = [-1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1]';
p2 = [1 -1 -1 -1 -1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 -1 1 1 -1 -1 1 -1 -1 -
1 -1 -1 1]';
P = [p0 p1 p2];
% targets
t0 = [1 0 0]';
t1 = [0 1 0]';
t2 = [0 0 1]';
T = [t0 t1 t2];
% backprop training for <trainingEpochs> iterations
for x = 1:trainingEpochs
% random one out of 3 test cases
r = randi(3);
% fwdComp, backProp, update
[out1, out2] = fwdComp(P(:,r), W1, b1, W2, b2);
[sens1, sens2] = backProp(out1, out2, T(:,r), W2);
[W1, b1, W2, b2] = update(W1, b1, W2, b2, learningRate, sens1, sens2,
out1, P(:,r));
% increase counter and error
noOfIterations(x+1) = (x+1);
error(x) = sum((T(:,r) - out2).^2);
end
% plot backprop results
plot(noOfIterations, error)
title('Backpropagation ANN Training Results')
xlabel('Number of Iterations')
ylabel('Mean Squared Error')
% testing for <testingEpochs> iterations
for t = 1:3
for y = 1:testingEpochs
[~, out2] = fwdComp(addNoise(P(:,t),0), W1, b1, W2, b2);
error1(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),4), W1, b1, W2, b2);
error2(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),8), W1, b1, W2, b2);
error3(y) = sum((T(:,t) - out2).^2);
end
end
% plot pixel corruption results with 0, 4, and 8 pixels changed
graphx = 0:4:8;
graphy = [mean(error1) mean(error2) mean(error3)];
bar(graphx,graphy);
title('Backpropagation ANN Testing Results')
xlabel('Number of Corrupted Pixels')
ylabel('Average Mean Squared Error')
end
function out = addNoise(originalDigit, pixelsChanged)
% find random pixel positions
r = randi([1 30], 1, pixelsChanged);
% for randomly chosen pixels, flip values
for idx = 1:pixelsChanged
originalDigit(r(idx)) = originalDigit(r(idx)) * -1;
end
out = originalDigit;
end
function [out1,out2] = fwdComp(input, W1, b1, W2, b2)
out1 = logsig(W1 * input + b1);
out2 = logsig(W2 * out1 + b2);
end
function [sens1,sens2] = backProp(out1, out2, target, W2)
% fdots using derivative of logsig
fdot2 = diag((ones(size(out2))-out2).*out2);
fdot1 = diag((ones(size(out1))-out1).*out1);
% backprop starting w/ second layer
sens2 = -2 * fdot2 * (target - out2);
sens1 = fdot1 * W2' * sens2;
end
function [W1n, b1n, W2n, b2n] = update(W1, b1, W2, b2, learningRate, sens1,
sens2, out1, input)
W2n = W2 - (learningRate * sens2 * out1');
b2n = b2 - (learningRate * sens2);
W1n = W1 - (learningRate * sens1 * input');
b1n = b1 - (learningRate * sens1);
end
Extra credit code (same, just different inputs, targets, test param, other
param)
function hw4ext
% define constants
trainingEpochs = 1000;
testingEpochs = 1000;
learningRate = 0.5;
noOfNeurons = 4;
error(1:trainingEpochs+1) = 0;
error1(1:testingEpochs+1) = 0;
error2(1:testingEpochs+1) = 0;
error3(1:testingEpochs+1) = 0;
noOfIterations(1:trainingEpochs+1) = 0;
% initial weights and biases (random b/w -0.5 and 0.5)
% 3 neuron hidden layer
W1 = (-0.5 + (0.5+0.5)*rand(30,noOfNeurons))';
b1 = (-0.5 + (0.5+0.5)*rand(1,noOfNeurons))';
W2 = -0.5 + (0.5+0.5)*rand(3,noOfNeurons);
b2 = -0.5 + (0.5+0.5)*rand(1,1);
% p0 = 0, p1 = 1, ... , p6 = 6
p0 = [-1 1 1 1 1 -1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 1 -1 -1 -1 -1 1 -1 1 1
1 1 -1]';
p1 = [-1 -1 -1 -1 -1 -1 1 -1 -1 -1 -1 -1 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 -1
-1 -1 -1 -1 -1]';
p2 = [1 -1 -1 -1 -1 -1 1 -1 -1 1 1 1 1 -1 -1 1 -1 1 -1 1 1 -1 -1 1 -1 -1 -
1 -1 -1 1]';
p3 = [-1 -1 -1 -1 -1 -1 -1 1 -1 -1 1 -1 1 -1 -1 -1 -1 1 1 -1 1 -1 -1 1 -1
1 -1 1 1 -1]';
p4 = [-1 -1 -1 -1 -1 -1 1 1 1 -1 -1 -1 -1 -1 1 -1 -1 -1 1 1 1 1 1 1 -1 -1
-1 -1 -1 -1]';
p5 = [-1 -1 -1 -1 -1 -1 1 1 1 -1 1 -1 1 -1 1 -1 -1 1 1 -1 1 -1 -1 1 1 -1 -
1 1 1 -1]';
p6 = [-1 -1 -1 -1 -1 -1 -1 1 1 1 1 -1 1 -1 1 -1 -1 1 1 -1 1 -1 -1 1 1 -1 -
1 1 1 -1]';
P = [p0 p1 p2 p3 p4 p5 p6];
% targets
t0 = [0 1 1 0 0 0 0];
t1 = [0 1 1 0 0 0 1];
t2 = [0 1 1 0 0 1 0];
t3 = [0 1 1 0 0 1 1];
t4 = [0 1 1 0 1 0 0];
t5 = [0 1 1 0 1 0 1];
t6 = [0 1 1 0 1 1 0];
T = [t0 t1 t2 t3 t4 t5 t6];
% backprop training for <trainingEpochs> iterations
for x = 1:trainingEpochs
% random one out of 7 test cases
r = randi(7);
% fwdComp, backProp, update
[out1, out2] = fwdComp(P(:,r), W1, b1, W2, b2);
[sens1, sens2] = backProp(out1, out2, T(:,r), W2);
[W1, b1, W2, b2] = update(W1, b1, W2, b2, learningRate, sens1, sens2,
out1, P(:,r));
% increase counter and error
noOfIterations(x+1) = (x+1);
error(x) = sum((T(:,r) - out2).^2);
end
% plot backprop results
plot(noOfIterations, error)
title('Backprop ASCII ANN Training Results')
xlabel('Number of Iterations')
ylabel('Mean Squared Error')
% testing for <testingEpochs> iterations
for t = 1:7
for y = 1:testingEpochs
[~, out2] = fwdComp(addNoise(P(:,t),0), W1, b1, W2, b2);
error1(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),4), W1, b1, W2, b2);
error2(y) = sum((T(:,t) - out2).^2);
[~, out2] = fwdComp(addNoise(P(:,t),8), W1, b1, W2, b2);
error3(y) = sum((T(:,t) - out2).^2);
end
end
% % plot pixel corruption results with 0, 4, and 8 pixels changed
% graphx = 0:4:8;
% graphy = [mean(error1) mean(error2) mean(error3)];
% bar(graphx,graphy);
% title('Backprop ASCII ANN Testing Results')
% xlabel('Number of Corrupted Pixels')
% ylabel('Average Mean Squared Error')
end
function out = addNoise(originalDigit, pixelsChanged)
% find random pixel positions
r = randi([1 30], 1, pixelsChanged);
% for randomly chosen pixels, flip values
for idx = 1:pixelsChanged
originalDigit(r(idx)) = originalDigit(r(idx)) * -1;
end
out = originalDigit;
end
function [out1,out2] = fwdComp(input, W1, b1, W2, b2)
out1 = logsig(W1 * input + b1);
out2 = logsig(W2 * out1 + b2);
end
function [sens1,sens2] = backProp(out1, out2, target, W2)
% fdots using derivative of logsig
fdot2 = diag((ones(size(out2))-out2).*out2);
fdot1 = diag((ones(size(out1))-out1).*out1);
% backprop starting w/ second layer
sens2 = -2 * fdot2 * (target - out2);
sens1 = fdot1 * W2' * sens2;
end
function [W1n, b1n, W2n, b2n] = update(W1, b1, W2, b2, learningRate, sens1,
sens2, out1, input)
W2n = W2 - (learningRate * sens2 * out1');
b2n = b2 - (learningRate * sens2);
W1n = W1 - (learningRate * sens1 * input');
b1n = b1 - (learningRate * sens1);
end