In this example, we'll build a simple RNN using TensorFlow and we'll train the RNN to add two binary numbers.
In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
%pylab inline
pylab.style.use('ggplot')
First, we make some training data. To keep things simple, we'll only pick numbers between 0 and $2^6$, so that the sum of the two numbers are less than $2^7$.
In [2]:
max_digits = 6
n_samples = 10000
ints = np.random.randint(low=0, high=np.power(2, max_digits), size=[n_samples, 2])
data_df = pd.DataFrame(ints, columns=['x1', 'x2'])
In [3]:
data_df.head()
Out[3]:
In [4]:
data_df = data_df.assign(y=data_df.x1 + data_df.x2)
In [5]:
data_df.head()
Out[5]:
The next step is to convert all the decimal numbers to their binary array representations. For this we'll use the numpy.unpackbits
function.
In [6]:
np.unpackbits(np.array(10, dtype=np.uint8))
Out[6]:
First y. The 5000 y values become a $1 \times 8 \times 5000$ 2D array.
In [7]:
y_data = np.unpackbits(data_df.y.astype(np.uint8))
y_data =y_data.astype(np.float64).reshape(n_samples, 8, 1)
y_data = np.transpose(y_data, axes=[1, 0, 2])
np.packbits(y_data[:, 0, :].astype(np.int64))
Out[7]:
In [8]:
y_data.shape
Out[8]:
Similarly, the 5000 x1 and x2 values become an $8 \times 5000 \times$ 2 3D array.
In [9]:
x_data = np.zeros([2, n_samples, 8], dtype=np.uint8)
x_data[0, :, :] = np.unpackbits(data_df.x1.astype(np.uint8)).reshape(n_samples, 8)
x_data[1, :, :] = np.unpackbits(data_df.x2.astype(np.uint8)).reshape(n_samples, 8)
x_data = x_data.astype(np.float64)
x_data = np.transpose(x_data, axes=[2, 1, 0])
np.packbits(x_data[:, 0, :].T.astype(np.int64))
Out[9]:
In [10]:
x_data.shape
Out[10]:
In [26]:
# Build the RNN graph
hidden_dim = 3
tf.reset_default_graph()
with tf.variable_scope('input'):
x_in = tf.placeholder(shape=(8, n_samples, 2), dtype=np.float64, name='x')
y_in = tf.placeholder(shape=(8, n_samples, 1), dtype=np.float64, name='y')
with tf.variable_scope('hidden'):
# Check dimensions
w_f = tf.get_variable(shape=[hidden_dim, 1], dtype=np.float64,
initializer=tf.truncated_normal_initializer(),
name='w_f')
w_h = tf.get_variable(shape=[2, hidden_dim], dtype=np.float64,
initializer=tf.truncated_normal_initializer(),
name='w_h')
u_h = tf.get_variable(shape=[1, hidden_dim], dtype=np.float64,
initializer=tf.truncated_normal_initializer(),
name='u_h')
b_f = tf.get_variable(shape=[1, 1], dtype=np.float64,
initializer=tf.zeros_initializer(),
name='b_f')
b_h = tf.get_variable(shape=[1, hidden_dim], dtype=np.float64,
initializer=tf.zeros_initializer(),
name='b_h')
with tf.variable_scope('output'):
y_t = tf.get_variable(shape=(n_samples, 1), dtype=np.float64,
initializer=tf.zeros_initializer(), name='y_t')
y_out = []
x_pos = tf.unstack(x_in, axis=0)
# x_pos is a tensor of length 8, each item 5000 * 2
y_pos = tf.unstack(y_in, axis=0)
# y_pos is a tensor of length 8, each item 5000 * 1
# reverse both x_pos and y_pos because
# we want to start at the LSB and work our way to the MSB
for x, y in zip(reversed(x_pos), reversed(y_pos)):
# dim check
# x: [5000, 2], w_h: [2, 3] -> tf.matmul(x, w_h): [5000, 3]
# y_t: [5000, 1], u_h: [1, 3] -> tf.matmul(y_t, u_h): [5000, 3]
# b_h: [1, 3] is broadcast into the sum
# finally, h_t: [5000, 3]
h_t = tf.nn.sigmoid(tf.matmul(x, w_h) + tf.matmul(y_t, u_h) + b_h, name='h_t')
# dim check
# w_f: [3, 1] -> tf.matmul(h_t, w_f): [5000, 1]
# b_f is again broadcast
y_t = tf.nn.sigmoid(tf.matmul(h_t, w_f) + b_f, name='y_t')
y_out.append(y_t)
with tf.variable_scope('loss'):
losses = []
for y_calc, y_actual in zip(y_out, reversed(y_pos)):
loss = tf.squared_difference(y_calc, y_actual)
losses.append(loss)
optimizer = tf.train.AdamOptimizer(learning_rate=0.04)
mean_loss = tf.reduce_mean(losses, name='ms_loss')
train_op = optimizer.minimize(mean_loss, name='minimization')
init = tf.global_variables_initializer()
n_training_iters = 2000
with tf.Session() as sess:
sess.run(init)
for i in range(1, n_training_iters+1):
_, loss_val = sess.run([train_op, mean_loss], feed_dict={x_in: x_data, y_in: y_data})
if i == 1 or i % 100 == 0:
print(i, loss_val)
y_out_vals = sess.run(y_out, feed_dict={x_in: x_data, y_in: y_data})
In [28]:
len(y_out_vals)
Out[28]:
In [29]:
y_out_t = np.array(y_out_vals)[:, :, 0]
In [30]:
y_out_f = np.fliplr(y_out_t.T)
In [31]:
y_out_int = np.where(y_out_f > 0.5, 1, 0).astype(np.uint8)
In [32]:
nums = np.packbits(y_out_int)
In [33]:
nums
Out[33]:
In [34]:
results = pd.DataFrame({'sum_actual': data_df.y, 'sum_predicted': nums})
In [35]:
results.sample(20).plot(kind='bar')
Out[35]:
In [36]:
results.tail(20)
Out[36]:
In [37]:
results.corr()
Out[37]: