4
4
removeall , unique , product , mode , argmax , argmax_random_tie , isclose , gaussian ,
5
5
dotproduct , vector_add , scalar_vector_product , weighted_sample_with_replacement ,
6
6
weighted_sampler , num_or_str , normalize , clip , sigmoid , print_table ,
7
- open_data , sigmoid_derivative , probability , norm , matrix_multiplication
7
+ open_data , sigmoid_derivative , probability , norm , matrix_multiplication , relu , relu_derivative
8
8
)
9
9
10
10
import copy
@@ -652,7 +652,7 @@ def predict(example):
652
652
653
653
654
654
def NeuralNetLearner (dataset , hidden_layer_sizes = None ,
655
- learning_rate = 0.01 , epochs = 100 ):
655
+ learning_rate = 0.01 , epochs = 100 , activation = sigmoid ):
656
656
"""Layered feed-forward network.
657
657
hidden_layer_sizes: List of number of hidden units per hidden layer
658
658
learning_rate: Learning rate of gradient descent
@@ -664,9 +664,9 @@ def NeuralNetLearner(dataset, hidden_layer_sizes=None,
664
664
o_units = len (dataset .values [dataset .target ])
665
665
666
666
# construct a network
667
- raw_net = network (i_units , hidden_layer_sizes , o_units )
667
+ raw_net = network (i_units , hidden_layer_sizes , o_units , activation )
668
668
learned_net = BackPropagationLearner (dataset , raw_net ,
669
- learning_rate , epochs )
669
+ learning_rate , epochs , activation )
670
670
671
671
def predict (example ):
672
672
# Input nodes
@@ -695,7 +695,7 @@ def random_weights(min_value, max_value, num_weights):
695
695
return [random .uniform (min_value , max_value ) for _ in range (num_weights )]
696
696
697
697
698
- def BackPropagationLearner (dataset , net , learning_rate , epochs ):
698
+ def BackPropagationLearner (dataset , net , learning_rate , epochs , activation = sigmoid ):
699
699
"""[Figure 18.23] The back-propagation algorithm for multilayer networks"""
700
700
# Initialise weights
701
701
for layer in net :
@@ -743,8 +743,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs):
743
743
# Error for the MSE cost function
744
744
err = [t_val [i ] - o_nodes [i ].value for i in range (o_units )]
745
745
746
- # The activation function used is the sigmoid function
747
- delta [- 1 ] = [sigmoid_derivative (o_nodes [i ].value ) * err [i ] for i in range (o_units )]
746
+ # The activation function used is relu or sigmoid function
747
+ if node .activation == sigmoid :
748
+ delta [- 1 ] = [sigmoid_derivative (o_nodes [i ].value ) * err [i ] for i in range (o_units )]
749
+ else :
750
+ delta [- 1 ] = [relu_derivative (o_nodes [i ].value ) * err [i ] for i in range (o_units )]
748
751
749
752
# Backward pass
750
753
h_layers = n_layers - 2
@@ -756,7 +759,11 @@ def BackPropagationLearner(dataset, net, learning_rate, epochs):
756
759
# weights from each ith layer node to each i + 1th layer node
757
760
w = [[node .weights [k ] for node in nx_layer ] for k in range (h_units )]
758
761
759
- delta [i ] = [sigmoid_derivative (layer [j ].value ) * dotproduct (w [j ], delta [i + 1 ])
762
+ if activation == sigmoid :
763
+ delta [i ] = [sigmoid_derivative (layer [j ].value ) * dotproduct (w [j ], delta [i + 1 ])
764
+ for j in range (h_units )]
765
+ else :
766
+ delta [i ] = [relu_derivative (layer [j ].value ) * dotproduct (w [j ], delta [i + 1 ])
760
767
for j in range (h_units )]
761
768
762
769
# Update weights
@@ -800,14 +807,14 @@ class NNUnit:
800
807
weights: Weights to incoming connections
801
808
"""
802
809
803
- def __init__ (self , weights = None , inputs = None ):
810
+ def __init__ (self , activation = sigmoid , weights = None , inputs = None ):
804
811
self .weights = weights or []
805
812
self .inputs = inputs or []
806
813
self .value = None
807
- self .activation = sigmoid
814
+ self .activation = activation
808
815
809
816
810
- def network (input_units , hidden_layer_sizes , output_units ):
817
+ def network (input_units , hidden_layer_sizes , output_units , activation = sigmoid ):
811
818
"""Create Directed Acyclic Network of given number layers.
812
819
hidden_layers_sizes : List number of neuron units in each hidden layer
813
820
excluding input and output layers
@@ -818,7 +825,7 @@ def network(input_units, hidden_layer_sizes, output_units):
818
825
else :
819
826
layers_sizes = [input_units ] + [output_units ]
820
827
821
- net = [[NNUnit () for n in range (size )]
828
+ net = [[NNUnit (activation ) for n in range (size )]
822
829
for size in layers_sizes ]
823
830
n_layers = len (net )
824
831
0 commit comments