In [6]:
'''
http://iamtrask.github.io/2015/07/12/basic-python-network/
'''
Out[6]:
In [1]:
import numpy as np
In [2]:
X = np.array([ [0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 1] ])
In [3]:
Y = np.array([ [0, 1, 1, 0] ]).T
In [4]:
syn0 = 2 * np.random.random( (3, 4) ) - 1
In [5]:
syn1 = 2 * np.random.random( (4, 1) ) - 1
In [31]:
for j in range(60000):
l1 = 1 / (1 + np.exp(- (np.dot(X, syn0))))
l2 = 1 / (1 + np.exp(- (np.dot(l1, syn1))))
l2_deta = (Y - l2) * (l2 * (1 - l2))
l1_deta = l2_deta.dot(syn1.T) * (l1 * (1 - l1))
syn1 += l1.T.dot(l2_deta)
syn0 += X.T.dot(l1_deta)
In [7]:
def nonlin(x, deriv = False):
if (deriv == True):
return x * (1-x)
return 1 / (1 + np.exp(-x))
In [8]:
# input dataset
X1 = np.array([ [0, 0, 1],
[0, 1, 1],
[1, 0, 1],
[1, 1, 1] ])
In [9]:
# output dataset
y1 = np.array([ [ 0, 0, 1, 1]]).T
In [10]:
# seed random numbers to make calcualtion deterministic (just a good practice)
np.random.seed(1)
In [11]:
# initialize weights randomly with mean 0
s0 = 2 * np.random.random( (3, 1) ) - 1
In [22]:
for iter in range(10000):
# forward propagation
l0 = X1
l1 = nonlin(np.dot(l0, s0))
# how much did we miss?
l1_error = y1 - l1
# multiply how much we missed by the slope of the sigmoid at the values in l1
# mathmatically, d(loss)/dw = - (d(l1)/dw) = - nonlin(l1) * l0
l1_deta = l1_error * nonlin(l1 , True)
# update the weights
# mathmatically, s0 -= d(loss)/dw
s0 += np.dot(l0.T, l1_deta)
print("output after traininig:")
print(l1)
print(s0)
In [23]:
X2 = np.array([ [0, 0, 1],
[0, 1, 1],
[1, 0, 1],
[1, 1, 1]])
In [24]:
y2 = np.array( [[0, 1, 1, 0]]).T
In [25]:
sy0 = 2 * np.random.random( (3, 4) ) - 1
sy1 = 2 * np.random.random( (4, 1) ) - 1
In [29]:
for j in range(60000):
# feed forward through layers 0, 1, and 2
l0 = X2
l1 = nonlin(np.dot(l0, sy0))
l2 = nonlin(np.dot(l1, sy1))
# how much did we miss the target value
l2_error = y2 - l2
if (j % 10000) == 0:
print("Error:" + str(np.mean(np.abs(l2_error))))
# in what direction is the target value?
# were we really sure? if so, don't change too much.
l2_deta = l2_error * nonlin(l2, deriv = True)
# how much did each l1 value contribute to the l2 error
# according to the weights?
l1_error = l2_deta.dot(sy1.T)
l1_deta = l1_error * nonlin(l1, deriv = True)
sy1 += l1.T.dot(l2_deta)
sy0 += l0.T.dot(l1_deta)