In [1]:
    
%matplotlib notebook
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
    
    
In [2]:
    
num_digits = 2
chars = '0123456789abcdef+ '
char2idx = {c: i for i, c in enumerate(chars)}
idx2char = {i: c for i, c in enumerate(chars)}
maxlen = num_digits * 2 + 1
def make_dataset(num_examples):
    X = np.zeros((num_examples, maxlen, len(chars)))
    y = np.zeros((num_examples, num_digits + 1, len(chars)))
    
    n1 = np.random.randint(0, 16**(num_digits)-1, num_examples)
    n2 = np.random.randint(0, 16**(num_digits)-1, num_examples)    
    res = n1 + n2
    for i in range(num_examples):
        inp = '%x+%x' % (n1[i], n2[i])
        inp += (maxlen - len(inp)) * ' '   # padding
        out = '%x' % res[i]
        out += (num_digits + 1 - len(out)) * ' '   # padding
        # 1-hot encoding
        for j, c in enumerate(inp[::-1]):  # invert input
            X[i, j, char2idx[c]] = 1
            
        for j, c in enumerate(out):
            y[i, j, char2idx[c]] = 1
            
    return X, y
    
In [3]:
    
make_dataset(1)[0]
    
    Out[3]:
In [4]:
    
tf.reset_default_graph()
batch_size = 128
lstm_size = 64
input_numbers = tf.placeholder(tf.float32, shape=[batch_size, maxlen, len(chars)])
input_labels = tf.placeholder(tf.int32, shape=[batch_size, num_digits + 1, len(chars)])
inp = tf.unstack(input_numbers, maxlen, 1)
# Encoder
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
#lstm = tf.contrib.rnn.MultiRNNCell([lstm] * 2)
net, states = tf.contrib.rnn.static_rnn(lstm, inp, dtype=tf.float32)
# Decoder.
with tf.variable_scope('foo'):
    lstm2 = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    net2, states = tf.contrib.rnn.static_rnn(lstm2, [net[-1]] * (num_digits + 1), dtype=tf.float32)
logits = tf.contrib.layers.fully_connected(
    tf.stack(net2[-(num_digits+1):], axis=1),
    len(chars), activation_fn=None)
loss = tf.losses.softmax_cross_entropy(input_labels, logits)
    
In [9]:
    
## run session first!
x, y = make_dataset(batch_size)
n_  = sess.run(net2, feed_dict={
        input_numbers: x,
        input_labels: y})
type(n_),len(n_),n_[1].shape
    
    Out[9]:
In [10]:
    
opt = tf.train.AdamOptimizer(learning_rate=0.005)
#opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
train_op = opt.minimize(loss)
    
In [11]:
    
init_op = tf.group(tf.global_variables_initializer(),
                   tf.local_variables_initializer())
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
sess.run(init_op)
#coord = tf.train.Coordinator()
#threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
In [12]:
    
lvals = []
fig,ax = plt.subplots(1,1)
for i in range(10000):
    x, y = make_dataset(batch_size)
    
    l, _ = sess.run([loss, train_op], feed_dict={
        input_numbers: x,
        input_labels: y
    })
    if i % 100 == 0:
        lvals.append(l)
        ax.clear()
        ax.plot(lvals)
        fig.canvas.draw()
    
    
    
In [13]:
    
x, y = make_dataset(batch_size)
    
l, = sess.run([tf.nn.softmax(logits)], feed_dict={
  input_numbers: x,
  input_labels: y
})
    
In [14]:
    
ans = ''
for i in x[0, ...]:
    ans += idx2char[int(np.where(i)[0])]
print(ans)
    
    
In [15]:
    
ans = ''
for i in y[0, ...]:
    ans += idx2char[int(np.where(i)[0])]
print(ans)
    
    
In [16]:
    
ans = ''
for i in np.argmax(l[0, ...], axis=1):
    ans += idx2char[i]
print(ans)
    
    
In [17]:
    
y[12],l[12]
    
    Out[17]:
In [18]:
    
np.argmax(l[0,:],axis=1)
    
    Out[18]:
In [19]:
    
np.argmax(y[0,:],axis=1)
    
    Out[19]:
In [20]:
    
for _ in range(10):
    x, y = make_dataset(batch_size)
    
    l, = sess.run([tf.nn.softmax(logits)], feed_dict={
      input_numbers: x,
      input_labels: y
    })
    r = np.sum(np.all( np.argmax(y,axis=-1) == np.argmax(l,axis=-1),axis=-1 ))/batch_size
    print(r)
    
    
In [ ]:
    
    
In [ ]: