In [1]:
%pylab inline
%config InlineBackend.figure_format = 'retina'
In [2]:
import numpy as np
from random import random
from IPython.display import FileLink, FileLinks
In [3]:
def σ(z):
return 1/(1 + np.e**(-z))
def σ_prime(z):
return np.e**(z) / (np.e**z + 1)**2
In [4]:
def Plot(fn, *args, **kwargs):
argLength = len(args);
if argLength == 1:
start = args[0][0]
end = args[0][1]
points = None
try:
points = args[0][2]
except:
pass
if not points: points = 30
xs = linspace(start, end, points);
plot(xs, list(map(fn, xs)), **kwargs);
In [5]:
Plot(σ, [-2, 2])
In [6]:
y = lambda neuron, input: neuron[0] * input + neuron[1]
α = lambda neuron, input: σ(y(neuron, input))
partial_w = lambda neuron, input: \
σ_prime(y(neuron, input)) * input
partial_y = lambda neuron, input: \
σ_prime(y(neuron, input))
In [7]:
class Neuron():
def __init__(self, neuron):
self.neuron = neuron
def output(self, input):
return α(self.neuron, input)
def set_η(self, η):
self.η = η
def train(self, input, target, η=None):
result = self.output(input);
δ = result - target
p_w = partial_w(self.neuron, input)
p_y = partial_y(self.neuron, input)
gradient = np.array([p_w, p_y])#/np.sqrt(p_w**2 + p_y**2)
if η is None:
η = self.η
self.neuron = - η * δ * gradient + self.neuron;
return result
In [71]:
class Network():
def __init__(self, shape, parameters=None):
self.shape = shape;
self.zs = {};
self.αs = {};
self.weights = {};
self.biases = {};
self.δs = {};
self.partial_ws = {};
if parameters is not None:
weights, biases = parameters;
self.weights = weights;
self.biases = biases;
else:
for i in range(1, len(shape)):
self.create_network(i, shape[i])
def create_network(self, ind, size):
if ind is 0: return;
self.weights[ind] = np.random.random(self.shape[ind-1:ind+1][::-1]) - 0.5
self.biases[ind] = np.random.random(self.shape[ind]) - 0.5
def get_partials_placeholder(self):
partial_ws = {};
δs = {};
for ind in range(1, len(self.shape)):
partial_ws[ind] = np.zeros(self.shape[ind-1:ind+1][::-1])
δs[ind] = np.zeros(self.shape[ind])
return partial_ws, δs;
def output(self, input=None):
if input is not None:
self.forward_pass(input);
return self.αs[len(self.shape) - 1]
def set_η(self, η=None):
if η is None: return
self.η = η
def train(self, input, target, η=None):
if η is None:
η = self.η
self.forward_pass(input)
self.back_propagation(target)
self.gradient_descent(η)
# done: generate a mini batch of training data,
# take an average of the gradeitn from the mini-batch
def train_batch(self, inputs, targets, η=None):
inputs_len = np.shape(inputs)[0]
targets_len = np.shape(targets)[0]
assert inputs_len == targets_len, \
"input and target need to have the same first dimension"
N = inputs_len
partial_ws, δs = self.get_partials_placeholder()
# print(partial_ws, δs)
for input, target in zip(inputs, targets):
# print(input, target)
self.forward_pass(input)
self.back_propagation(target)
for ind in range(1, len(self.shape)):
partial_ws[ind] += self.partial_ws[ind] / float(N)
δs[ind] += self.δs[ind] / float(N)
self.partial_ws = partial_ws
self.δs = δs
self.gradient_descent(η)
def forward_pass(self, input):
# forward passing
self.αs[0] = input;
for i in range(1, len(self.shape)):
self.forward_pass_layer(i);
def back_propagation(self, target):
# back-propagation
ind_last = len(self.shape) - 1
self.δs[ind_last] = σ_prime(self.zs[ind_last]) * \
(self.αs[ind_last] - target);
for i in list(range(1, len(self.shape)))[::-1]:
self.back_propagation_layer(i)
def gradient_descent(self, η):
# gradient descent
for i in range(1, len(self.shape)):
self.gradient_descent_layer(i, η)
def forward_pass_layer(self, ind):
"""ind is the index of the current network"""
self.zs[ind] = self.biases[ind] + \
np.tensordot(self.weights[ind], self.αs[ind - 1], axes=1)
self.αs[ind] = σ(self.zs[ind])
def back_propagation_layer(self, ind):
"""ind \in [len(self.shape) - 1, 1]"""
if ind > 1:
self.δs[ind - 1] = σ_prime(self.zs[ind-1]) * \
np.tensordot(self.δs[ind], self.weights[ind], axes=1)
self.partial_ws[ind] = np.tensordot(self.δs[ind], self.αs[ind - 1], axes=0)
def gradient_descent_layer(self, ind, η):
"""ind \in [1, ...len(shape) - 1]"""
self.weights[ind] = self.weights[ind] - η * self.partial_ws[ind]
self.biases[ind] = self.biases[ind] - η * self.δs[ind]
Now let's train the data set the way before, to validate our new class.
In [9]:
# train as a simple neuron
target_func = lambda x: 1 if x < 0.5 else 0
nw = Network([1, 4, 1])
figure(figsize=(16, 4))
subplot(131)
# todo: generate a mini batch of training data,
# take an average of the gradeitn from the mini-batch
inputs = [[x] for x in np.linspace(0, 1, 100)]
targets = [[target_func(x)] for x in np.linspace(0, 1, 100)]
for ind in range(10000):
x = np.random.random()
nw.train([x], [target_func(x)], 10)
scatter(x, target_func(x))
Plot(lambda x: nw.output([x])[0], [0, 1], label="neural net")
Plot(lambda x: target_func(x), [0, 1], color='r', linewidth=4, alpha=0.3, label="target function")
xlim(-0.25, 1.25)
ylim(-0.25, 1.25)
legend(loc=3, frameon=False)
subplot(132)
imshow(nw.weights[1], interpolation='none', aspect=1);colorbar();
subplot(133)
imshow(nw.weights[2], interpolation='none', aspect=1);colorbar()
# subplot(144)
# imshow(nw.weights[3], interpolation='none', aspect=1);colorbar()
Out[9]:
In [145]:
# train as a simple neuron
target_func = lambda x: 1 if x < 0.5 else 0
nw = Network([1, 4, 1])
figure(figsize=(4, 4))
#subplot(141)
batch_size = 10
inputs = [[x] for x in np.linspace(0, 1, batch_size)]
targets = [[target_func(x)] for x in np.linspace(0, 1, batch_size)]
n = 0
for i in range(3):
for ind in range(40):
n += 1;
nw.train_batch(inputs, targets, 10)
Plot(lambda x: nw.output([x])[0], [0, 1], label="NN {} batches".format(n))
plot([i[0] for i in inputs], [t[0] for t in targets], 'r.', label="training data")
xlim(-0.25, 1.25)
ylim(-0.25, 1.25)
_title = "Training Progress Through\nMini-batches (4 hidden neurons)"
title(_title, fontsize=15)
legend(loc=(1.2, 0.25), frameon=False)
fn = "004 batch training " + _title.replace('\n', ' ') + ".png"
savefig(fn, dpi=300,
bbox_inches='tight',
transparent=True,
pad_inches=0)
FileLink(fn)
Out[145]:
In [143]:
# train as a simple neuron
target_func = lambda x: np.cos(x)**2
nw = Network([1, 10, 1])
figure(figsize=(16, 4))
#subplot(141)
batch_size = 100
grid = np.linspace(0, 10, batch_size)
inputs = [[x] for x in grid]
targets = [[target_func(x)] for x in grid]
n = 0
for i in range(4):
for ind in range(500):
n += 1;
nw.train_batch(inputs, targets, 40)
Plot(lambda x: nw.output([x])[0], [0, 10], label="NN {} batches".format(n))
plot([i[0] for i in inputs], [t[0] for t in targets], 'r.', label="training data")
_title = "Training Progress Through Mini-batches (10 hidden neurons)"
title(_title)
xlim(-0.25, 10.25)
ylim(-0.25, 1.25)
legend(loc=4, frameon=False)
fn = "004 batch training " + _title + ".png"
savefig(fn, dpi=300,
bbox_inches='tight',
transparent=True,
pad_inches=0)
FileLink(fn)
Out[143]:
Well as it turned out, to encode an absolute value function is hard. You can play with the code below and try to learn it, but for less than 10 hidden neurons the result is usually pretty terrible.
It is possible however, to learn half of the absolute function, and encode only a straight line.
In [212]:
# train as a simple neuron
target_func = lambda x: np.abs(x - 0.5)
nw = Network([1, 20, 1])
figure(figsize=(6, 6))
batch_size = 40
grid = np.linspace(0, 0.5, batch_size)
inputs = [[x] for x in grid]
targets = [[target_func(x)] for x in grid]
n = 0
for i in range(4):
for ind in range(1000):
n += 1;
nw.train_batch(inputs, targets, 23)
Plot(lambda x: nw.output([x])[0], [0, 1.0], label="NN {} batches".format(n))
plot([i[0] for i in inputs], [t[0] for t in targets], 'r.', label="training data")
_title = "Emulate Half of An Absolute Value Function"
title(_title)
xlim(-0.25, 1.25)
ylim(-0.25, 1.25)
legend(loc=1, frameon=False)
fn = "004 batch training " + _title.replace('\n', ' ') + ".png"
savefig(fn,
dpi=300,
bbox_inches='tight',
transparent=True,
pad_inches=0)
FileLink(fn)
Out[212]:
Now equiped with this set of hyper-parameters, I thought:
"If I can train both of the two halfs of the
*absolute function* separately, I can build
the entire function but adding these two
half together, right?"
Then I tried 2 $\times$ of the number of hidden neurons.
And amazingly, it just worked.
In [211]:
# train as a simple neuron
target_func = lambda x: np.abs(x - 0.5)
nw = Network([1, 40, 1])
figure(figsize=(6, 6))
batch_size = 80
grid = np.linspace(0, 1, batch_size)
inputs = [[x] for x in grid]
targets = [[target_func(x)] for x in grid]
n = 0
for i in range(4):
for ind in range(4000):
n += 1;
nw.train_batch(inputs, targets, 10)
Plot(lambda x: nw.output([x])[0], [0, 1.0], label="NN {} batches".format(n))
plot([i[0] for i in inputs], [t[0] for t in targets], 'r.', label="training data")
_title = "Emulate An Absolute\nFunction (2 times of hidden neurons)"
title(_title)
xlim(-0.25, 1.25)
ylim(-0.25, 1.25)
legend(loc=1, frameon=False)
fn = "004 batch training " + _title.replace('\n', ' ') + ".png"
savefig(fn,
dpi=300,
bbox_inches='tight',
transparent=True,
pad_inches=0)
FileLink(fn)
Out[211]:
In [ ]:
In [ ]: