In [1]:
# import tflearn
import tensorflow as tf
import numpy as np

from board_utils import generate_1d_data
from board_utils import build_1d_datasets

# interactive notebook
# jupyter nbextension enable --py --sys-prefix widgetsnbextension
# import ipywidgets as widgets
# from ipywidgets import interact

# widgets.interact(f, x=10);


# def build_graph(
#         n_length=8,
#         n_hidden=32,
#         learning_rate=1e-3):

state_size=4  #?
n_length=8
n_hidden=32
learning_rate=1e-3
batch_size=32
n_classes = 8
    
data, labels = generate_1d_data(n_length=n_length, one_hot=True)
datasets = build_1d_datasets(data, labels)

# x = tf.placeholder(tf.float32, [None, n_length, 1])
# y = tf.placeholder(tf.float32, [None, n_length])  # one-hot label
x = tf.placeholder(tf.int32, [None, n_length])
y = tf.placeholder(tf.int32, [None, n_length])
init_state = tf.zeros([None, state_size])

# RNN inputs
x_one_hot = tf.one_hot(x, n_classes)
rnn_inputs = tf.unstack(x_one_hot, axis=1)

#
with tf.variable_scope('rnn_cell'):
    W = tf.get_variable('W', [num_classes + state_size, state_size])
    b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))

# RNN
# cell = tf.nn.rnn_cell.LSTMCell(n_hidden)
# init_state = cell.zero_state(batch_size, tf.float32)
# rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)
outputs, states = tf.nn.seq2seq

# FC1
# fc1 = tf.reshape(rnn_outputs, [-1, n_length])
# y_reshaped = tf.reshape(y, [-1])

with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [n_length, n_hidden])
    b = tf.get_variable('b', [n_hidden], initializer=tf.constant_initializer(0.0))

# ?? logits
logits = tf.matmul(fc1, W) + b

# loss
# cross_entropy = tf.nn.softmax_cross_entropy_with_logits(rnn_outputs, y)
# loss = tf.reduce_mean(cross_entropy)

# regression
optimize = tf.train.AdamOptimizer(learning_rate).minimize(loss)

#     return dict(
#             x = x,
#             y = y,
#             init_state = init_state,
#             final_state = final_state
#             train_step = train_step,
#             total_loss = total_loss)

def train_network(
        num_epochs,
        num_steps,
        verbose=True):

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for step in range(num_steps):
            xf, yf = datasets.train.next_batch(batch_size)

            acc = sess.run(optimize, feed_dict={x: xf, y: yf})
            
            if step % 50 == 0 and step > 0:
                if verbose:
                    print('step:{}, acc:{}'.format(step, acc))

# Cute code below, but didn't work too well
# [samples, timesteps, input dim]
# net = learn.
# net = tflearn.input_data(shape=[None, n_length, 1])
# net = tflearn.lstm(net, n_hidden, return_seq=True)
# net = tflearn.lstm(net, n_hidden)
# net = tflearn.fully_connected(net, n_length, activation='softmax')
# net = tflearn.regression(net)

# model = tflearn.DNN(net, tensorboard_verbose=2)
# model = tflearn.
# model.fit(x, y, n_epoch=1, validation_set=0.1, show_metric=True)

training_losses = train_network(1, 1000)


---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1020     try:
-> 1021       return fn(*args)
   1022     except errors.OpError as e:

/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
   1002                                  feed_dict, fetch_list, target_list,
-> 1003                                  status, run_metadata)
   1004 

/usr/lib/python3.4/contextlib.py in __exit__(self, type, value, traceback)
     65             try:
---> 66                 next(self.gen)
     67             except StopIteration:

/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/errors_impl.py in raise_exception_on_not_ok_status()
    468           compat.as_text(pywrap_tensorflow.TF_Message(status)),
--> 469           pywrap_tensorflow.TF_GetCode(status))
    470   finally:

InvalidArgumentError: logits and labels must be same size: logits_size=[256,32] labels_size=[32,8]
	 [[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape_1, Reshape_2)]]

During handling of the above exception, another exception occurred:

InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-1-ef4a603980d8> in <module>()
     91 # model.fit(x, y, n_epoch=1, validation_set=0.1, show_metric=True)
     92 
---> 93 training_losses = train_network(1, 1000)

<ipython-input-1-ef4a603980d8> in train_network(num_epochs, num_steps, verbose)
     72             xf, yf = datasets.train.next_batch(batch_size)
     73 
---> 74             acc = sess.run(optimize, feed_dict={x: xf, y: yf})
     75 
     76             if step % 50 == 0 and step > 0:

/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    764     try:
    765       result = self._run(None, fetches, feed_dict, options_ptr,
--> 766                          run_metadata_ptr)
    767       if run_metadata:
    768         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
    962     if final_fetches or final_targets:
    963       results = self._do_run(handle, final_targets, final_fetches,
--> 964                              feed_dict_string, options, run_metadata)
    965     else:
    966       results = []

/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1012     if handle is None:
   1013       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1014                            target_list, options, run_metadata)
   1015     else:
   1016       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/usr/local/lib/python3.4/dist-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
   1032         except KeyError:
   1033           pass
-> 1034       raise type(e)(node_def, op, message)
   1035 
   1036   def _extend_graph(self):

InvalidArgumentError: logits and labels must be same size: logits_size=[256,32] labels_size=[32,8]
	 [[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape_1, Reshape_2)]]

Caused by op 'SoftmaxCrossEntropyWithLogits', defined at:
  File "/usr/lib/python3.4/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.4/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.4/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python3.4/dist-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.4/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-ef4a603980d8>", line 49, in <module>
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(rnn_outputs, y)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/nn_ops.py", line 1449, in softmax_cross_entropy_with_logits
    precise_logits, labels, name=name)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/ops/gen_nn_ops.py", line 2265, in _softmax_cross_entropy_with_logits
    features=features, labels=labels, name=name)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[256,32] labels_size=[32,8]
	 [[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](Reshape_1, Reshape_2)]]

In [5]:
import numpy as np

data = np.random.randint(0, 2, size=[4, 8, 1])

# Generate labels from data
# The arbitrary problem for the machine is to find the
# connection length from left to right.
labels = np.zeros([4, 8, 1], dtype=int)
for i, board in enumerate(data):

    #         if np.sum(board, axis=0) == n:  # quickly get fully connected boards
    #             labels[i] = 1
    #         else:
    #             labels[i] = 0

    # Stepwise look for 1's per grid
    connection_length = 0
    for j, grid in enumerate(board):
        if grid == 1:
            connection_length += 1
        else:
            break  # stop looking to save some computation
    if connection_length:
        labels[i][connection_length - 1] = 1

labels


Out[5]:
array([[[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]],

       [[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]],

       [[0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]],

       [[1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]]])

In [9]:
class rnn_model(object):
    def __init__(self, 
                 x,
                 y=None,
                 n_hidden=32,
                 batch_size=32):
        self.x = x
        self.y = y
        self.n_hidden = n_hidden
        self.batch_size = batch_size
        
#     @property
    def inference(self):
        '''forward pass up to logits'''
        cell = tf.nn.rnn_cell.LSTMCell(self.n_hidden)
        init_state = cell.zero_state(self.batch_size, tf.float32)
        rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, self.x, dtype=tf.float32)

        # FC1
        fc1 = tf.reshape(rnn_outputs, [-1, self.n_length])

        with tf.variable_scope('softmax'):
            W = tf.get_variable('W', [self.n_hidden, self.n_length])
            b = tf.get_variable('b', [self.n_length], initializer=tf.constant_initializer(0.0))

        # ?? logits
        logits = tf.matmul(fc1, W) + b
        return logits
    
#     @property
    def loss(self):
        y_reshaped = tf.reshape(self.y, [-1])
        cross_entropy = tf.nn.softmax_cross_entropy_with_logits(self.logits, y_reshaped)
        self.loss = tf.reduce_mean(cross_entropy)
        return total_loss
    
#     @property
    def optimize(self):
        return tf.train.AdamOptimizer(learning_rate).minimize(self.loss, name='optimizer')

In [5]:
%time np.array(np.random.choice(2, size=(4,)))
%time np.random.randint(0, 2, size=[1, 4])


CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 126 µs
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 45.8 µs
Out[5]:
array([[0, 1, 0, 0]])

In [20]:
import tensorflow as tf
import numpy as np

from board_utils import generate_1d_data
from board_utils import build_1d_datasets

def reset_graph():
    '''
    reset graph so we can run it multiple times without 
    duplicating variables (causing nasty errors)
    '''
    if 'sess' in globals() and sess:
        sess.close()  # Close any open session
    tf.reset_default_graph()  # clear graph stack

def build_graph(state_size=4,  #?
                n_length=8,
                num_hidden=32,
                learning_rate=1e-3,
                batch_size=32,
                num_classes=8,
                num_steps=8):

    reset_graph()
    
    # placeholders
    #
    
    x = tf.placeholder(tf.int32, [None, n_length])
    y = tf.placeholder(tf.int32, [None, n_length])

    # Inputs
    #
    
    embeddings = tf.get_variable('embedding_matrix', [n_length, state_size])
    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)  # get correct shape of input
    print(rnn_inputs)

    # RNN
    #
    
    cell = tf.nn.rnn_cell.LSTMCell(state_size)
    init_state = cell.zero_state(batch_size, tf.float32)

    # Add rnn_cells to graph
    # dynamic_rnn wants input shape [batch, time, state_size]
    # alternatively use nested tensors of [[batch, time]]
    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
    
    # Prediction loss and optimize
    #
    
    with tf.variable_scope('softmax'):
        W = tf.get_variable('W', [state_size, num_classes])
        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
    
    # reshape rnn_outputs so a single matmul is possible
    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
    y_reshaped = tf.reshape(y, [-1])
    logits = tf.matmul(rnn_outputs, W) + b
    
#     logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
#     predictions = [tf.nn.softmax(logit) for logit in logits]
#     y_as_list = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, num_steps, y)]

    loss_weights = [tf.ones([batch_size]) for i in range(num_steps)]
#     losses = tf.nn.seq2seq.sequence_loss_by_example(logits, y_as_list, loss_weights)
    losses = tf.nn.seq2seq.sequence_loss_by_example(logits, y_reshaped, loss_weights)
    total_loss = tf.reduce_mean(losses)
    optimize = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    return dict(
        x = x,
        y = y,
        init_state = init_state,
        final_state = final_state,
        total_loss = total_loss,
        optimize = optimize)
    
def train_network(g,
                  num_epochs,
                  num_steps,
                  batch_size=32,
                  verbose=True):

    data, labels = generate_1d_data(8, one_hot=True)
    datasets = build_1d_datasets(data, labels)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        total_loss = 0
        for step in range(num_steps):
            xf, yf = datasets.train.next_batch(batch_size)

            _, training_loss = sess.run([g['optimize'], g['total_loss']], 
                         feed_dict={g['x']: xf, g['y']: yf})
            
            total_loss += training_loss
            if step % 50 == 0 and step > 0:
                if verbose:
                    print('step:{}, training loss per step:{:.3}'.format(step, training_loss / step))


g = build_graph()
train_network(g, 1, 1000)


[<tf.Tensor 'unstack:0' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:1' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:2' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:3' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:4' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:5' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:6' shape=(?, 8) dtype=float32>, <tf.Tensor 'unstack:7' shape=(?, 8) dtype=float32>]
step:50, training loss per step:0.0378
step:100, training loss per step:0.0151
step:150, training loss per step:0.00761
step:200, training loss per step:0.00435
step:250, training loss per step:0.00276
step:300, training loss per step:0.0017
step:350, training loss per step:0.0013
step:400, training loss per step:0.000902
step:450, training loss per step:0.000989
step:500, training loss per step:0.000797
step:550, training loss per step:0.000568
step:600, training loss per step:0.000545
step:650, training loss per step:0.000462
step:700, training loss per step:0.000422
step:750, training loss per step:0.0004
step:800, training loss per step:0.000315
step:850, training loss per step:0.000255
step:900, training loss per step:0.000282
step:950, training loss per step:0.000193

In [33]:
'''
Debugging input shapes
'''

reset_graph()

x = tf.placeholder(tf.int32, [None, 8])
y = tf.placeholder(tf.int32, [None, 8])
embeddings = tf.get_variable('emb_matrix', [8, 4])
rnn_inputs = tf.nn.embedding_lookup(embeddings, x)
rnn_inputs

cell = tf.nn.rnn_cell.LSTMCell(4)
init_state = cell.zero_state(32, tf.float32)
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)
print(rnn_outputs)
rnn_outputs = tf.reshape(rnn_outputs, [-1, 4])
y_reshaped = tf.reshape(y, [-1])

rnn_outputs
y_reshaped


Tensor("RNN/transpose:0", shape=(32, 5, 4), dtype=float32)
Out[33]:
<tf.Tensor 'Reshape_1:0' shape=(?,) dtype=int32>

In [ ]: