In [6]:
'''
http://iamtrask.github.io/2015/07/12/basic-python-network/
'''


Out[6]:
'\nhttp://iamtrask.github.io/2015/07/12/basic-python-network/\n'

In [1]:
import numpy as np

In [2]:
X = np.array([ [0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1] ])

In [3]:
Y = np.array([ [0, 1, 1, 0] ]).T

In [4]:
syn0 = 2 * np.random.random( (3, 4) ) - 1

In [5]:
syn1 = 2 * np.random.random( (4, 1) ) - 1

In [31]:
for j in range(60000):
    l1 = 1 / (1 + np.exp(- (np.dot(X, syn0))))
    l2 = 1 / (1 + np.exp(- (np.dot(l1, syn1))))
    l2_deta = (Y - l2) * (l2 * (1 - l2))
    l1_deta = l2_deta.dot(syn1.T) * (l1 * (1 - l1))
    syn1 += l1.T.dot(l2_deta)
    syn0 += X.T.dot(l1_deta)

In [7]:
def nonlin(x, deriv = False):
    if (deriv == True):
        return x * (1-x)
    return 1 / (1 + np.exp(-x))

In [8]:
# input dataset
X1 = np.array([ [0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1] ])

In [9]:
# output dataset
y1 = np.array([ [ 0, 0, 1, 1]]).T

In [10]:
# seed random numbers to make calcualtion deterministic (just a good practice)
np.random.seed(1)

In [11]:
# initialize weights randomly with mean 0
s0 = 2 * np.random.random( (3, 1) ) - 1

In [22]:
for iter in range(10000):
    # forward propagation
    l0 = X1
    l1 = nonlin(np.dot(l0, s0))
    
    # how much did we miss?
    l1_error = y1 - l1
    
    # multiply how much we missed by the slope of the sigmoid at the values in l1
    # mathmatically, d(loss)/dw = - (d(l1)/dw) = - nonlin(l1) * l0
    l1_deta = l1_error * nonlin(l1 , True)
    
    # update the weights
    # mathmatically, s0 -= d(loss)/dw
    s0 += np.dot(l0.T, l1_deta)
    
print("output after traininig:")
print(l1)
print(s0)


output after traininig:
[[ 0.0040021 ]
 [ 0.00297283]
 [ 0.99757913]
 [ 0.99674033]]
[[ 11.5381446 ]
 [ -0.29833737]
 [ -5.5169334 ]]

In [23]:
X2 = np.array([ [0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

In [24]:
y2 = np.array( [[0, 1, 1, 0]]).T

In [25]:
sy0 = 2 * np.random.random( (3, 4) ) - 1
sy1 = 2 * np.random.random( (4, 1) ) - 1

In [29]:
for j in range(60000):
    # feed forward through layers 0, 1, and 2
    l0 = X2
    l1 = nonlin(np.dot(l0, sy0))
    l2 = nonlin(np.dot(l1, sy1))
    
    # how much did we miss the target value
    l2_error = y2 - l2
    if (j % 10000) == 0:
        print("Error:" + str(np.mean(np.abs(l2_error))))
    
    # in what direction is the target value?
    # were we really sure? if so, don't change too much.
    l2_deta = l2_error * nonlin(l2, deriv = True)
    
    # how much did each l1 value contribute to the l2 error
    # according to the weights?
    l1_error = l2_deta.dot(sy1.T)
    
    l1_deta = l1_error * nonlin(l1, deriv = True)
    sy1 += l1.T.dot(l2_deta)
    sy0 += l0.T.dot(l1_deta)


Error:0.499441740099
Error:0.0109962761993
Error:0.00742138802019
Error:0.00593720036234
Error:0.00507862622414
Error:0.0045034259747