diff --git a/learning.py b/learning.py index 06a719745..d01968601 100644 --- a/learning.py +++ b/learning.py @@ -3,7 +3,8 @@ from utils import ( removeall, unique, product, mode, argmax, argmax_random_tie, isclose, gaussian, dotproduct, vector_add, scalar_vector_product, weighted_sample_with_replacement, - weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, DataFile + weighted_sampler, num_or_str, normalize, clip, sigmoid, print_table, + DataFile, sigmoid_derivative ) import copy @@ -568,13 +569,17 @@ def predict(example): return predict +def random_weights(min_value, max_value, num_weights): + return [random.uniform(min_value, max_value) for i in range(num_weights)] + + def BackPropagationLearner(dataset, net, learning_rate, epochs): """[Figure 18.23] The back-propagation algorithm for multilayer network""" # Initialise weights for layer in net: for node in layer: - node.weights = [random.uniform(-0.5, 0.5) - for i in range(len(node.weights))] + node.weights = random_weights(min_value=-0.5, max_value=0.5, + num_weights=len(node.weights)) examples = dataset.examples ''' @@ -612,10 +617,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): delta = [[] for i in range(n_layers)] # Compute outer layer delta - err = [t_val[i] - o_nodes[i].value - for i in range(o_units)] - delta[-1] = [(o_nodes[i].value) * (1 - o_nodes[i].value) * - (err[i]) for i in range(o_units)] + + # Error for the MSE cost function + err = [t_val[i] - o_nodes[i].value for i in range(o_units)] + # The activation function used is the sigmoid function + delta[-1] = [sigmoid_derivative(o_nodes[i].value) * err[i] for i in range(o_units)] # Backward pass h_layers = n_layers - 2 @@ -624,11 +630,9 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs): h_units = len(layer) nx_layer = net[i+1] # weights from each ith layer node to each i + 1th layer node - w = [[node.weights[k] for node in nx_layer] - for k in range(h_units)] + w = [[node.weights[k] for node in nx_layer] for k in range(h_units)] - delta[i] = [(layer[j].value) * (1 - layer[j].value) * - dotproduct(w[j], delta[i+1]) + delta[i] = [sigmoid_derivative(layer[j].value) * dotproduct(w[j], delta[i+1]) for j in range(h_units)] # Update weights @@ -754,7 +758,8 @@ def LinearLearner(dataset, learning_rate=0.01, epochs=100): X_col = [ones] + X_col # Initialize random weigts - w = [random.uniform(-0.5, 0.5) for _ in range(len(idx_i) + 1)] + num_weights = len(idx_i) + 1 + w = random_weights(min_value=-0.5, max_value=0.5, num_weights=num_weights) for epoch in range(epochs): err = [] @@ -769,7 +774,6 @@ def LinearLearner(dataset, learning_rate=0.01, epochs=100): for i in range(len(w)): w[i] = w[i] + learning_rate * (dotproduct(err, X_col[i]) / num_examples) - def predict(example): x = [1] + example return dotproduct(w, x) diff --git a/tests/test_learning.py b/tests/test_learning.py index ec2cf18bd..8b7a923cc 100644 --- a/tests/test_learning.py +++ b/tests/test_learning.py @@ -1,7 +1,7 @@ from learning import parse_csv, weighted_mode, weighted_replicate, DataSet, \ PluralityLearner, NaiveBayesLearner, NearestNeighborLearner, \ NeuralNetLearner, PerceptronLearner, DecisionTreeLearner, \ - euclidean_distance, grade_learner, err_ratio + euclidean_distance, grade_learner, err_ratio, random_weights from utils import DataFile @@ -124,3 +124,18 @@ def test_perceptron(): assert grade_learner(perceptron, tests) > 1/2 assert err_ratio(perceptron, iris) < 0.4 + + +def test_random_weights(): + min_value = -0.5 + max_value = 0.5 + num_weights = 10 + + test_weights = random_weights(min_value, max_value, num_weights) + + assert len(test_weights) == num_weights + + for weight in test_weights: + assert weight >= min_value and weight <= max_value + + diff --git a/tests/test_utils.py b/tests/test_utils.py index d158833d0..952a2d966 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -154,6 +154,14 @@ def test_gaussian(): assert gaussian(3,1,3) == 0.3989422804014327 +def test_sigmoid_derivative(): + value = 1 + assert sigmoid_derivative(value) == 0 + + value = 3 + assert sigmoid_derivative(value) == -6 + + def test_step(): assert step(1) == step(0.5) == 1 assert step(0) == 1 diff --git a/utils.py b/utils.py index 5afa43760..b8b295790 100644 --- a/utils.py +++ b/utils.py @@ -249,6 +249,10 @@ def clip(x, lowest, highest): return max(lowest, min(x, highest)) +def sigmoid_derivative(value): + return value * (1 - value) + + def sigmoid(x): """Return activation value of x with sigmoid function""" return 1/(1 + math.exp(-x))