In [1]:
import numpy as np
In [2]:
"""
Gradient Descent
Example
f(x) = x**4 -3*x**3 + 2
f'(x) = 4*x**3 - 9*x**2
We want f'(x) = 0 - slope is zero / minima
Initial guess: x=4
f'(4) = 112
Second guess: x=-1
f'(-1) = -13
=> f'(4) > 0 => decrease x | f'(-1) < 0 => increase x
x_new = x_old - f'(x) ??? => not idea as this would be too stepp
x_new = x_old - alpha * f'(x) => what kind of value for alpha??
"""
Out[2]:
In [3]:
alpha = 0.01
x_old = 0
x_new = 4
precision = 0.00001
def f_derivative(x):
return 4*x**3 - 9*x**2
while abs(x_new - x_old) > precision:
x_old = x_new
x_new = x_old - alpha * f_derivative(x_old)
In [4]:
# sigmoid function
def sigmoid(x, deriv=False):
if (deriv==True):
return x*(1-x)
return 1/(1+np.exp(-x))
In [5]:
# input dataset
X = np.array([
[0, 0, 1],
[0, 1, 1],
[1, 0, 1],
[1, 1, 1]
])
In [6]:
# output dataset
y = np.array([[0, 0, 1, 1]]).T
In [7]:
# fix the seed
np.random.seed(1)
In [8]:
weights0 = 2*np.random.random((3,1)) - 1
weights0
Out[8]:
In [9]:
for i in range(10000):
# forward propagation
l0 = X
l1 = sigmoid(np.dot(l0, weights0))
# how much did we miss?
l1_error = y - l1
# multiply how much we missed by the slope of the sigmoid at the values in l1
l1_delta = l1_error * sigmoid(l1, True)
# update weights
weights0 += np.dot(l0.T, l1_delta)
In [10]:
l1
Out[10]:
In [11]:
def sigmoid(x):
return 1.0/(1.0 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x)*(1.0-sigmoid(x))
def tanh(x):
return np.tanh(x)
def tanh_prime(x):
return 1.0 - x**2
In [12]:
class NeuralNetwork(object):
# fix the seed
np.random.seed(1)
def __init__(self, layers, learning_rate, epochs):
self.activation = sigmoid
self.activation_prime = sigmoid_prime
self.weights = []
# input layers -> hidden layers
for i in range(1, len(layers) - 1):
# + 1 due to bias
r = 2 * np.random.random((layers[i-1] + 1, layers[i] + 1)) -1
self.weights.append(r)
# hidden layers -> output layer
r = 2 * np.random.random((layers[i] + 1, layers[i+1])) - 1
self.weights.append(r)
self.learning_rate = learning_rate
self.epochs = epochs
def train(self, X, y):
# Add bias to X
ones = np.atleast_2d(np.ones(X.shape[0]))
X = np.concatenate((ones.T, X), axis=1)
for epoch in range(self.epochs):
# forward propagation
i = np.random.randint(X.shape[0])
a = [X[i]] # mini-batch
for l in range(len(self.weights)):
z = np.dot(a[l], self.weights[l])
activation = self.activation(z)
a.append(activation) # (row, a_hidden_layer, a_output_layer)
# how much did we miss??
error = y[i] - a[-1]
deltas = [error * self.activation_prime(a[-1])]
for l in range(len(a) - 2, 0, -1):
deltas.append(deltas[-1].dot(self.weights[l].T) * self.activation_prime(a[l]))
# [level3(output)->level2(hidden)] => [level2(hidden)->level3(output)]
deltas.reverse()
# backpropagation => update weights
for i in range(len(self.weights)):
layer = np.atleast_2d(a[i])
delta = np.atleast_2d(deltas[i])
self.weights[i] += self.learning_rate * layer.T.dot(delta)
if epoch % 10000 == 0: print("epochs: {}".format(epoch))
def predict(self, x):
a = np.concatenate((np.ones(1).T, np.array(x)), axis=0)
for l in range(0, len(self.weights)):
a = self.activation(np.dot(a, self.weights[l]))
return a
In [13]:
"""
Convergence depends on the random initialization of weights
"""
learning_rate = 0.2
epochs = 100000
nn = NeuralNetwork([2, 10, 1], learning_rate, epochs)
In [14]:
# XOR Problem
X = np.array([[0, 0],
[1, 0],
[0, 1],
[1, 1]])
In [15]:
y = np.array([0, 1, 1, 0])
In [16]:
nn.train(X, y)
In [17]:
for i in X:
print(i, nn.predict(i))
In [ ]:
In [ ]: