Backpropagation



In [1]:

    
import numpy as np

Gradient Descent



In [2]:

    
"""
Gradient Descent
Example
f(x) = x**4 -3*x**3 + 2
f'(x) = 4*x**3 - 9*x**2

We want f'(x) = 0 - slope is zero / minima

Initial guess: x=4
f'(4) = 112

Second guess: x=-1
f'(-1) = -13

=> f'(4) > 0 => decrease x | f'(-1) < 0 => increase x

x_new = x_old - f'(x) ??? => not idea as this would be too stepp

x_new = x_old - alpha * f'(x) => what kind of value for alpha??
"""









    Out[2]:





"\nGradient Descent\nExample\nf(x) = x**4 -3*x**3 + 2\nf'(x) = 4*x**3 - 9*x**2\n\nWe want f'(x) = 0 - slope is zero / minima\n\nInitial guess: x=4\nf'(4) = 112\n\nSecond guess: x=-1\nf'(-1) = -13\n\n=> f'(4) > 0 => decrease x | f'(-1) < 0 => increase x\n\nx_new = x_old - f'(x) ??? => not idea as this would be too stepp\n\nx_new = x_old - alpha * f'(x) => what kind of value for alpha??\n"



In [3]:

    
alpha = 0.01
x_old = 0
x_new = 4
precision = 0.00001

def f_derivative(x):
    return 4*x**3 - 9*x**2

while abs(x_new - x_old) > precision:
    x_old = x_new
    x_new = x_old - alpha * f_derivative(x_old)

Example for Backprop

http://iamtrask.github.io/2015/07/12/basic-python-network/



In [4]:

    
# sigmoid function
def sigmoid(x, deriv=False):
    if (deriv==True):
        return x*(1-x)
    return 1/(1+np.exp(-x))



In [5]:

    
# input dataset
X = np.array([
        [0, 0, 1],
        [0, 1, 1],
        [1, 0, 1],
        [1, 1, 1]
    ])



In [6]:

    
# output dataset
y = np.array([[0, 0, 1, 1]]).T



In [7]:

    
# fix the seed
np.random.seed(1)



In [8]:

    
weights0 = 2*np.random.random((3,1)) - 1
weights0









    Out[8]:





array([[-0.16595599],
       [ 0.44064899],
       [-0.99977125]])



In [9]:

    
for i in range(10000):
    
    # forward propagation
    l0 = X
    l1 = sigmoid(np.dot(l0, weights0))
    
    # how much did we miss?
    l1_error = y - l1
    
    # multiply how much we missed by the slope of the sigmoid at the values in l1
    l1_delta = l1_error * sigmoid(l1, True)
    
    # update weights
    weights0 += np.dot(l0.T, l1_delta)



In [10]:

    
l1









    Out[10]:





array([[ 0.00966449],
       [ 0.00786506],
       [ 0.99358898],
       [ 0.99211957]])

Example for Backprop of a simple Neural Networks

$$ \sigma(x) = \frac{1}{1 + exp(-x)} $$$$ \frac{d \sigma(x)}{dx} = \sigma(x) * (1 - \sigma(x)) $$



In [11]:

    
def sigmoid(x):
    return 1.0/(1.0 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1.0-sigmoid(x))

def tanh(x):
    return np.tanh(x)

def tanh_prime(x):
    return 1.0 - x**2



In [12]:

    
class NeuralNetwork(object):
    
    # fix the seed
    np.random.seed(1)
    
    def __init__(self, layers, learning_rate, epochs):
        self.activation = sigmoid
        self.activation_prime = sigmoid_prime
        self.weights = []
        # input layers -> hidden layers
        for i in range(1, len(layers) - 1):
            # + 1 due to bias
            r = 2 * np.random.random((layers[i-1] + 1, layers[i] + 1)) -1
            self.weights.append(r)
        # hidden layers -> output layer
        r = 2 * np.random.random((layers[i] + 1, layers[i+1])) - 1
        self.weights.append(r)
        self.learning_rate = learning_rate
        self.epochs = epochs
        
    def train(self, X, y):
        # Add bias to X
        ones = np.atleast_2d(np.ones(X.shape[0]))
        X = np.concatenate((ones.T, X), axis=1)
        
        for epoch in range(self.epochs):
            # forward propagation
            i = np.random.randint(X.shape[0])
            a = [X[i]] # mini-batch

            for l in range(len(self.weights)):
                z = np.dot(a[l], self.weights[l])
                activation = self.activation(z)
                a.append(activation) # (row, a_hidden_layer, a_output_layer)
                
            # how much did we miss??
            error = y[i] - a[-1]
            deltas = [error * self.activation_prime(a[-1])]
            
            for l in range(len(a) - 2, 0, -1):
                deltas.append(deltas[-1].dot(self.weights[l].T) * self.activation_prime(a[l]))
            
            # [level3(output)->level2(hidden)]  => [level2(hidden)->level3(output)]
            deltas.reverse()            
            
            # backpropagation => update weights
            for i in range(len(self.weights)):
                layer = np.atleast_2d(a[i])
                delta = np.atleast_2d(deltas[i])
                self.weights[i] += self.learning_rate * layer.T.dot(delta)
                
            if epoch % 10000 == 0: print("epochs: {}".format(epoch))
                
    def predict(self, x): 
        a = np.concatenate((np.ones(1).T, np.array(x)), axis=0)      
        for l in range(0, len(self.weights)):
            a = self.activation(np.dot(a, self.weights[l]))
        return a



In [13]:

    
"""
Convergence depends on the random initialization of weights
"""
learning_rate = 0.2
epochs = 100000
nn = NeuralNetwork([2, 10, 1], learning_rate, epochs)



In [14]:

    
# XOR Problem
X = np.array([[0, 0],
              [1, 0],
              [0, 1],
              [1, 1]])



In [15]:

    
y = np.array([0, 1, 1, 0])



In [16]:

    
nn.train(X, y)









    



epochs: 0
epochs: 10000
epochs: 20000
epochs: 30000
epochs: 40000
epochs: 50000
epochs: 60000
epochs: 70000
epochs: 80000
epochs: 90000



In [17]:

    
for i in X:
    print(i, nn.predict(i))









    



[0 0] [ 0.00716588]
[1 0] [ 0.98944644]
[0 1] [ 0.99007747]
[1 1] [ 0.00804965]



In [ ]:



In [ ]: