In [1]:
'''
http://iamtrask.github.io/2015/07/27/python-network-part2/
'''


Out[1]:
'\nhttp://iamtrask.github.io/2015/07/27/python-network-part2/\n'

In [1]:
import numpy as np

In [9]:
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T
alpha, hidden_dim = (0.5, 4)
synapse_0 = 2*np.random.random((3,hidden_dim)) - 1
synapse_1 = 2*np.random.random((hidden_dim,1)) - 1
for j in range(60000):
    layer_1 = 1 / (1 + np.exp(-(np.dot(X,synapse_0))))
    layer_2 = 1 / (1 + np.exp(-(np.dot(layer_1,synapse_1))))
    layer_2_delta = (layer_2 - y) * (layer_2*(1-layer_2))
    layer_1_delta = layer_2_delta.dot(synapse_1.T) * (layer_1 * (1-layer_1))
    synapse_1 -= (alpha * layer_1.T.dot(layer_2_delta))
    synapse_0 -= (alpha * X.T.dot(layer_1_delta))

In [4]:
def sigmoid(x):
    output = 1 / (1 + np.exp(-x))
    return output

In [5]:
def sigmoid_output_to_derivative(output):
    return output * (1 - output)

In [6]:
X1 = np.array( [ [0, 1], [0, 1], [1, 0], [1, 0] ])
y1 = np.array( [[0, 0, 1, 1]]).T

In [7]:
np.random.seed(1)

In [8]:
# initialize weights randomly with mean 0
syn_0 = 2 * np.random.random( (2, 1) ) - 1

In [13]:
# one layer that contains 2 neural
for iter in range(10000):
    # forward propagation:
    layer_0 = X1
    layer_1 = sigmoid(np.dot(layer_0, syn_0))
    
    loss =  np.sum(np.square(layer_1 - y1)) / 2
    if iter % 2000 == 0:
        print("Loss: ", loss)
    layer_1_error = layer_1 - y1
    
    layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
    syn_0_derivative = np.dot(layer_0.T, layer_1_delta)
    
    # update weights
    syn_0 -= syn_0_derivative
print("output after traininig:")
print(layer_1)


Loss:  1.68616414077e-05
Loss:  1.58019191401e-05
Loss:  1.48673683188e-05
Loss:  1.40370603972e-05
Loss:  1.32944846773e-05
output after traininig:
[[ 0.00251266]
 [ 0.00251266]
 [ 0.99748737]
 [ 0.99748737]]

In [8]:
alphas = [0.001,0.01,0.1,1,10,100,1000]
X = np.array([ [0,0,1],[0,1,1],[1,0,1],[1,1,1] ])
y = np.array([[0,1,1,0]]).T
for alpha in alphas:
    print("\nTraining with alpha: " + str(alpha))
    np.random.seed(1)
    
    syn_0 = 2 * np.random.random( (3, 4) ) - 1
    syn_1 = 2 * np.random.random( (4, 1) ) - 1
    
    prev_sync_0_weight_update = np.zeros_like(syn_0)
    prev_sync_1_weight_update = np.zeros_like(syn_1)
    
    syn_0_direction_count = np.zeros_like(syn_0)
    syn_1_direction_count = np.zeros_like(syn_1)
    
    for j in range(60000):
        layer_0 = X
        layer_1 = sigmoid(np.dot(layer_0, syn_0))
        layer_2 = sigmoid(np.dot(layer_1, syn_1))
        
        loss = np.sum(np.square(layer_2 - y)) / 2
        if j % 20000 == 0:
            print("Loss: ", loss)
        
        layer_2_error = layer_2 - y
        
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)
        
        layer_1_error = layer_2_delta.dot(syn_1.T)
        
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        
        syn_1_weight_update = (layer_1.T.dot(layer_2_delta))
        syn_0_weight_update = (layer_0.T.dot(layer_1_delta))
        
        if (j > 0):
            syn_0_direction_count += np.abs(((syn_0_weight_update > 0) + 0) - ((prev_sync_0_weight_update > 0) + 0))
            syn_1_direction_count += np.abs(((syn_1_weight_update > 0) + 0) - ((prev_sync_1_weight_update > 0) + 0))
        
        syn_1 -= syn_1_weight_update
        syn_0 -= syn_0_weight_update
        
        prev_sync_0_weight_update = syn_0_weight_update
        prev_sync_1_weight_update = syn_1_weight_update
        
    print("Sync 0: ", syn_0)
    print("sync 0 update direction change: ", syn_0_direction_count)
    print("Sync 1: ", syn_1)
    print("Sync 1 update direction change: ", syn_1_direction_count)


Training with alpha: 0.001
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 0.01
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 0.1
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 1
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 10
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 100
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

Training with alpha: 1000
Loss:  0.495186535402
Loss:  6.85224212314e-05
Loss:  3.20160508308e-05
Sync 0:  [[ 4.6013571   4.17197193 -6.30956245 -4.19745118]
 [-2.58413484 -5.81447929 -6.60793435 -3.68396123]
 [ 0.97538679 -2.02685775  2.52949751  5.84371739]]
sync 0 update direction change:  [[ 1.  1.  0.  0.]
 [ 2.  0.  0.  2.]
 [ 4.  2.  1.  1.]]
Sync 1:  [[ -6.96765763]
 [  7.14101949]
 [-10.31917382]
 [  7.86128405]]
Sync 1 update direction change:  [[ 2.]
 [ 1.]
 [ 0.]
 [ 1.]]

In [ ]: