In [1]:
"""
Single layer lstm cell example with tf v0.11
A program that learns to do math. e.g. 24 + 36 = 60
Performance is shown to improve on training.
Idea originated from Siraj Raval youtube channel
Idea:
Take addition of two numbers as a time series and apply rnn
"""
import numpy as np
batch_size = 1
sequence_length = 8
num_of_input_neurons = 2
num_of_hidden_neurons = 16
num_of_output_neurons = 1
forget_bias = 1.0
learning_rate = 0.01
# a dictionary to convert 1-byte integers to binary numbers LE
int2binLE = {}
bin_dim = 8
max_num = pow(2,bin_dim)
bits = np.array([range(max_num)], dtype=np.uint8)
binary = np.unpackbits(bits.T,axis=1)
for i in range(max_num):
int2binLE[i] = binary[i][::-1]
# a function to convert 8bit bin num LE to integer
def binLE2int(input_bin, little_endian = True):
out = 0
if little_endian:
for index, x in enumerate(input_bin):
out += x * pow(2,index)
else:
for index, x in enumerate(reversed(input_bin)):
out += x * pow(2,index)
return int(out)
# create a batch size of data
def create_data(batch_size):
data = np.empty(0)
labels = np.empty(0)
for _ in range(batch_size):
# create a random addition problem to train the net
num1_int = np.random.randint(max_num/2)
num2_int = np.random.randint(max_num/2)
result_int = num1_int + num2_int
# put the problem in binary num
num1_bin = int2binLE[num1_int]
num2_bin = int2binLE[num2_int]
result_bin = int2binLE[result_int]
# one data is given in shape of [[bit8], []]
one_data = np.array([num1_bin, num2_bin]).transpose(1,0)
data = np.append(data, one_data)
labels = np.append(labels, result_bin)
data = data.reshape(batch_size, sequence_length, -1)
labels = labels.reshape(batch_size, sequence_length, -1)
return data, labels
In [2]:
data, labels = create_data(1)
print("Two binary numbers in LE")
print(data.transpose(0,2,1).reshape(-1,2,sequence_length))
print("Result of addition in LE")
print(labels.reshape(-1,sequence_length))
In [3]:
import tensorflow as tf
# now the input data is two binary numbers in LE
# the label is one binary numbers in LE
# layer 1 with lstm
# num of data x binary digit LE x 2
x = tf.placeholder(tf.float32, [None, sequence_length, num_of_input_neurons])
# num of label x binary digit LE x 1
y_ = tf.placeholder(tf.float32, [None, sequence_length, num_of_output_neurons])
w1 = tf.Variable(tf.random_uniform([num_of_input_neurons, num_of_hidden_neurons], minval=-0.5,maxval=0.5))
b1 = tf.Variable(tf.random_uniform([num_of_hidden_neurons], minval=-0.5,maxval=0.5))
# (binary digit LE -- num of data) x 2
x_reshaped = tf.reshape(tf.transpose(x, [1, 0, 2]), [-1, num_of_input_neurons])
# binary digit LE x num of data x 1
labels_reshaped = tf.transpose(y_, [1, 0, 2])
# (binary digit LE -- num of data) x num of hidden neuron
perceptron1 = tf.matmul(x_reshaped, w1) + b1
# binary digit LE x (num of data x num of hidden neuron)
perceptron1_split = tf.split(0, sequence_length, perceptron1)
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_of_hidden_neurons, forget_bias=forget_bias, state_is_tuple=True)
rnn_outputs, states = tf.nn.rnn(lstm_cell, perceptron1_split, dtype=tf.float32)
# layer 2 is a simple sigmoid(x w + b)
w2 = tf.Variable(tf.random_uniform([num_of_hidden_neurons, num_of_output_neurons], minval=-0.5,maxval=0.5))
b2 = tf.Variable(tf.random_uniform([num_of_output_neurons], minval=-0.5,maxval=0.5))
perceptron2 = [tf.matmul(rnn, w2) + b2 for rnn in rnn_outputs]
y = tf.nn.sigmoid(perceptron2) # binary digit LE x (num of data x 1)
cross_entropy = tf.reduce_mean(
tf.reduce_sum(
- labels_reshaped * tf.log(y) - (1.0-labels_reshaped) * tf.log(1.0-y), reduction_indices=[0]
)
)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
prediction = tf.round(y)
correct_prediction = tf.equal(prediction, tf.round(labels_reshaped))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
In [4]:
sess = tf.Session()
sess.run(tf.initialize_all_variables())
iterations = 300
for j in range(iterations):
train_data, train_labels = create_data(batch_size)
feed_dict = {
x: train_data,
y_: train_labels
}
sess.run(train_step, feed_dict=feed_dict)
if ((j + 1) % (iterations /10)) == 0:
ce = sess.run(cross_entropy, feed_dict=feed_dict)
predict = sess.run(prediction, feed_dict=feed_dict)
#inside = sess.run(y, feed_dict=feed_dict)
print('iteration: ' + str(j))
print('cross entropy: ' + str(ce))
print('labels: ' + str(train_labels.reshape(-1, sequence_length)))
print('predict: ' + str(predict.reshape(-1, sequence_length)))
#print('real_y: ' + str(inside.reshape(-1, sequence_length)))
num1 = binLE2int(train_data[0].transpose(1,0)[0])
num2 = binLE2int(train_data[0].transpose(1,0)[1])
out = binLE2int(predict.reshape(-1, sequence_length)[0])
print(str(num1) + " + " + str(num2) + " = " + str(out))
print()