In [1]:
import numpy as np #matrix math 
import tensorflow as tf #machine learningt
import helpers #for formatting data into batches and generating random sequence data

tf.reset_default_graph() #Clears the default graph stack and resets the global default graph.
sess = tf.InteractiveSession() #initializes a tensorflow session


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-1-bd31fee87081> in <module>()
      1 import numpy as np #matrix math
      2 import tensorflow as tf #machine learningt
----> 3 import helpers #for formatting data into batches and generating random sequence data
      4 
      5 tf.reset_default_graph() #Clears the default graph stack and resets the global default graph.

ModuleNotFoundError: No module named 'helpers'

In [2]:
tf.__version__


Out[2]:
'1.2.1'

In [3]:
PAD = 0
EOS = 1

vocab_size = 10
input_embedding_size = 20 #character length

encoder_hidden_units = 20 #num neurons
decoder_hidden_units = encoder_hidden_units * 2

In [5]:
#input placehodlers
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs')
#contains the lengths for each of the sequence in the batch, we will pad so all the same
#if you don't want to pad, check out dynamic memory networks to input variable length sequences
encoder_inputs_length = tf.placeholder(shape=(None,), dtype=tf.int32, name='encoder_inputs_length')
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets')

In [6]:
#randomly initialized embedding matrrix that can fit input sequence
#used to convert sequences to vectors (embeddings) for both encoder and decoder of the right size
#reshaping is a thing, in TF you gotta make sure you tensors are the right shape (num dimensions)
embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32)

#this thing could get huge in a real world application
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)

In [7]:
from tensorflow.python.ops.rnn_cell import LSTMCell, LSTMStateTuple

In [9]:
encoder_cell = LSTMCell(encoder_hidden_units)
((encoder_fw_outputs,
  encoder_bw_outputs),
 (encoder_fw_final_state,
  encoder_bw_final_state)) = (
    tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_cell,
                                    cell_bw=encoder_cell,
                                    inputs=encoder_inputs_embedded,
                                    sequence_length=encoder_inputs_length,
                                    dtype=tf.float64, time_major=True)
    )


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in apply_op(self, op_type_name, name, **keywords)
    434                 preferred_dtype=default_dtype,
--> 435                 as_ref=input_arg.is_ref)
    436             if input_arg.number_attr and len(

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_n_to_tensor(values, dtype, name, as_ref, preferred_dtype)
    801             as_ref=as_ref,
--> 802             preferred_dtype=preferred_dtype))
    803   return ret

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in internal_convert_to_tensor(value, dtype, name, as_ref, preferred_dtype)
    740         if ret is None:
--> 741           ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    742 

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/ops.py in _TensorTensorConversionFunction(t, dtype, name, as_ref)
    613         "Tensor conversion requested dtype %s for Tensor with dtype %s: %r"
--> 614         % (dtype.name, t.dtype.name, str(t)))
    615   return t

ValueError: Tensor conversion requested dtype float32 for Tensor with dtype float64: 'Tensor("bidirectional_rnn/fw/fw/while/Identity_3:0", shape=(?, 20), dtype=float64)'

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
<ipython-input-9-feef21b21ace> in <module>()
      8                                     inputs=encoder_inputs_embedded,
      9                                     sequence_length=encoder_inputs_length,
---> 10                                     dtype=tf.float64, time_major=True)
     11     )

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length, initial_state_fw, initial_state_bw, dtype, parallel_iterations, swap_memory, time_major, scope)
    373           initial_state=initial_state_fw, dtype=dtype,
    374           parallel_iterations=parallel_iterations, swap_memory=swap_memory,
--> 375           time_major=time_major, scope=fw_scope)
    376 
    377     # Backward direction

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in dynamic_rnn(cell, inputs, sequence_length, initial_state, dtype, parallel_iterations, swap_memory, time_major, scope)
    572         swap_memory=swap_memory,
    573         sequence_length=sequence_length,
--> 574         dtype=dtype)
    575 
    576     # Outputs of _dynamic_rnn_loop are always shaped [time, batch, depth].

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _dynamic_rnn_loop(cell, inputs, initial_state, parallel_iterations, swap_memory, sequence_length, dtype)
    735       loop_vars=(time, output_ta, state),
    736       parallel_iterations=parallel_iterations,
--> 737       swap_memory=swap_memory)
    738 
    739   # Unpack final output if not using output tuples.

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
   2768     context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
   2769     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2770     result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
   2771     return result
   2772 

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2597       self.Enter()
   2598       original_body_result, exit_vars = self._BuildLoop(
-> 2599           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2600     finally:
   2601       self.Exit()

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2547         structure=original_loop_vars,
   2548         flat_sequence=vars_for_body_with_tensor_arrays)
-> 2549     body_result = body(*packed_vars_for_body)
   2550     if not nest.is_sequence(body_result):
   2551       body_result = [body_result]

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _time_step(time, output_ta_t, state)
    718           call_cell=call_cell,
    719           state_size=state_size,
--> 720           skip_conditionals=True)
    721     else:
    722       (output, new_state) = call_cell()

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in _rnn_step(time, sequence_length, min_sequence_length, max_sequence_length, zero_output, state, call_cell, state_size, skip_conditionals)
    204     # steps.  This is faster when max_seq_len is equal to the number of unrolls
    205     # (which is typical for dynamic_rnn).
--> 206     new_output, new_state = call_cell()
    207     nest.assert_same_structure(state, new_state)
    208     new_state = nest.flatten(new_state)

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in <lambda>()
    706 
    707     input_t = nest.pack_sequence_as(structure=inputs, flat_sequence=input_t)
--> 708     call_cell = lambda: cell(input_t, state)
    709 
    710     if sequence_length is not None:

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
    178       with vs.variable_scope(vs.get_variable_scope(),
    179                              custom_getter=self._rnn_get_variable):
--> 180         return super(RNNCell, self).__call__(inputs, state)
    181 
    182   def _rnn_get_variable(self, getter, *args, **kwargs):

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
    439         # Check input assumptions set after layer building, e.g. input shape.
    440         self._assert_input_compatibility(inputs)
--> 441         outputs = self.call(inputs, *args, **kwargs)
    442 
    443         # Apply activity regularization.

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
    540                 self._num_unit_shards))
    541       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
--> 542       lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True)
    543       i, j, f, o = array_ops.split(
    544           value=lstm_matrix, num_or_size_splits=4, axis=1)

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn_cell_impl.py in _linear(args, output_size, bias, bias_initializer, kernel_initializer)
   1019       res = math_ops.matmul(args[0], weights)
   1020     else:
-> 1021       res = math_ops.matmul(array_ops.concat(args, 1), weights)
   1022     if not bias:
   1023       return res

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py in concat(values, axis, name)
   1046   return gen_array_ops._concat_v2(values=values,
   1047                                   axis=axis,
-> 1048                                   name=name)
   1049 
   1050 

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py in _concat_v2(values, axis, name)
    493   """
    494   result = _op_def_lib.apply_op("ConcatV2", values=values, axis=axis,
--> 495                                 name=name)
    496   return result
    497 

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py in apply_op(self, op_type_name, name, **keywords)
    461                                 (prefix, dtype.name))
    462               else:
--> 463                 raise TypeError("%s that don't all match." % prefix)
    464             else:
    465               raise TypeError("%s that are invalid." % prefix)

TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have types [float32, float64] that don't all match.

In [10]:
encoder_fw_outputs


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-a72156d5cc89> in <module>()
----> 1 encoder_fw_outputs

NameError: name 'encoder_fw_outputs' is not defined

In [11]:
#Concatenates tensors along one dimension.
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

#letters h and c are commonly used to denote "output value" and "cell state". 
#http://colah.github.io/posts/2015-08-Understanding-LSTMs/ 
#Those tensors represent combined internal state of the cell, and should be passed together. 

encoder_final_state_c = tf.concat(
    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)

encoder_final_state_h = tf.concat(
    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

#TF Tuple used by LSTM Cells for state_size, zero_state, and output state.
encoder_final_state = LSTMStateTuple(
    c=encoder_final_state_c,
    h=encoder_final_state_h
)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-3ca3a90265a4> in <module>()
      1 #Concatenates tensors along one dimension.
----> 2 encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)
      3 
      4 #letters h and c are commonly used to denote "output value" and "cell state".
      5 #http://colah.github.io/posts/2015-08-Understanding-LSTMs/

NameError: name 'encoder_fw_outputs' is not defined

In [12]:
decoder_cell = LSTMCell(decoder_hidden_units)

In [13]:
#we could print this, won't need
encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs))

In [14]:
decoder_lengths = encoder_inputs_length + 3
# +2 additional steps, +1 leading <EOS> token for decoder inputs

In [15]:
#create padded inputs for the decoder from the word embeddings

#were telling the program to test a condition, and trigger an error if the condition is false.
assert EOS == 1 and PAD == 0

eos_time_slice = tf.ones([batch_size], dtype=tf.int32, name='EOS')
pad_time_slice = tf.zeros([batch_size], dtype=tf.int32, name='PAD')

#retrieves rows of the params tensor. The behavior is similar to using indexing with arrays in numpy
eos_step_embedded = tf.nn.embedding_lookup(embeddings, eos_time_slice)
pad_step_embedded = tf.nn.embedding_lookup(embeddings, pad_time_slice)

In [16]:
#manually specifying loop function through time - to get initial cell state and input to RNN
#normally we'd just use dynamic_rnn, but lets get detailed here with raw_rnn

#we define and return these values, no operations occur here
def loop_fn_initial():
    initial_elements_finished = (0 >= decoder_lengths)  # all False at the initial step
    #end of sentence
    initial_input = eos_step_embedded
    #last time steps cell state
    initial_cell_state = encoder_final_state
    #none
    initial_cell_output = None
    #none
    initial_loop_state = None  # we don't need to pass any additional information
    return (initial_elements_finished,
            initial_input,
            initial_cell_state,
            initial_cell_output,
            initial_loop_state)

In [17]:
#attention mechanism --choose which previously generated token to pass as input in the next timestep
def loop_fn_transition(time, previous_output, previous_state, previous_loop_state):

    
    def get_next_input():
        #dot product between previous ouput and weights, then + biases
        output_logits = tf.add(tf.matmul(previous_output, W), b)
        #Logits simply means that the function operates on the unscaled output of 
        #earlier layers and that the relative scale to understand the units is linear. 
        #It means, in particular, the sum of the inputs may not equal 1, that the values are not probabilities 
        #(you might have an input of 5).
        #prediction value at current time step
        
        #Returns the index with the largest value across axes of a tensor.
        prediction = tf.argmax(output_logits, axis=1)
        #embed prediction for the next input
        next_input = tf.nn.embedding_lookup(embeddings, prediction)
        return next_input
    
    
    elements_finished = (time >= decoder_lengths) # this operation produces boolean tensor of [batch_size]
                                                  # defining if corresponding sequence has ended

    
    
    #Computes the "logical and" of elements across dimensions of a tensor.
    finished = tf.reduce_all(elements_finished) # -> boolean scalar
    #Return either fn1() or fn2() based on the boolean predicate pred.
    input = tf.cond(finished, lambda: pad_step_embedded, get_next_input)
    
    #set previous to current
    state = previous_state
    output = previous_output
    loop_state = None

    return (elements_finished, 
            input,
            state,
            output,
            loop_state)

In [18]:
def loop_fn(time, previous_output, previous_state, previous_loop_state):
    if previous_state is None:    # time == 0
        assert previous_output is None and previous_state is None
        return loop_fn_initial()
    else:
        return loop_fn_transition(time, previous_output, previous_state, previous_loop_state)

#Creates an RNN specified by RNNCell cell and loop function loop_fn.
#This function is a more primitive version of dynamic_rnn that provides more direct access to the 
#inputs each iteration. It also provides more control over when to start and finish reading the sequence, 
#and what to emit for the output.
#ta = tensor array
decoder_outputs_ta, decoder_final_state, _ = tf.nn.raw_rnn(decoder_cell, loop_fn)
decoder_outputs = decoder_outputs_ta.stack()


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-18-9c0943ef0306> in <module>()
     11 #and what to emit for the output.
     12 #ta = tensor array
---> 13 decoder_outputs_ta, decoder_final_state, _ = tf.nn.raw_rnn(decoder_cell, loop_fn)
     14 decoder_outputs = decoder_outputs_ta.stack()

/Users/lipingzhang/anaconda/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py in raw_rnn(cell, loop_fn, parallel_iterations, swap_memory, scope)
    934     (elements_finished, next_input, initial_state, emit_structure,
    935      init_loop_state) = loop_fn(
--> 936          time, None, None, None)  # time, cell_output, cell_state, loop_state
    937     flat_input = nest.flatten(next_input)
    938 

<ipython-input-18-9c0943ef0306> in loop_fn(time, previous_output, previous_state, previous_loop_state)
      2     if previous_state is None:    # time == 0
      3         assert previous_output is None and previous_state is None
----> 4         return loop_fn_initial()
      5     else:
      6         return loop_fn_transition(time, previous_output, previous_state, previous_loop_state)

<ipython-input-16-802ca661e392> in loop_fn_initial()
      8     initial_input = eos_step_embedded
      9     #last time steps cell state
---> 10     initial_cell_state = encoder_final_state
     11     #none
     12     initial_cell_output = None

NameError: name 'encoder_final_state' is not defined

In [19]:
decoder_outputs


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-19-3ef3eb7f4efe> in <module>()
----> 1 decoder_outputs

NameError: name 'decoder_outputs' is not defined

In [20]:
#to convert output to human readable prediction
#we will reshape output tensor

#Unpacks the given dimension of a rank-R tensor into rank-(R-1) tensors.
#reduces dimensionality
decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_outputs))
#flettened output tensor
decoder_outputs_flat = tf.reshape(decoder_outputs, (-1, decoder_dim))
#pass flattened tensor through decoder
decoder_logits_flat = tf.add(tf.matmul(decoder_outputs_flat, W), b)
#prediction vals
decoder_logits = tf.reshape(decoder_logits_flat, (decoder_max_steps, decoder_batch_size, vocab_size))


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-20-1882a0b53018> in <module>()
      4 #Unpacks the given dimension of a rank-R tensor into rank-(R-1) tensors.
      5 #reduces dimensionality
----> 6 decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(decoder_outputs))
      7 #flettened output tensor
      8 decoder_outputs_flat = tf.reshape(decoder_outputs, (-1, decoder_dim))

NameError: name 'decoder_outputs' is not defined

In [21]:
#final prediction
decoder_prediction = tf.argmax(decoder_logits, 2)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-21-1bb4625bb82d> in <module>()
      1 #final prediction
----> 2 decoder_prediction = tf.argmax(decoder_logits, 2)

NameError: name 'decoder_logits' is not defined

In [22]:
#cross entropy loss
#one hot encode the target values so we don't rank just differentiate
stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
    logits=decoder_logits,
)

#loss function
loss = tf.reduce_mean(stepwise_cross_entropy)
#train it 
train_op = tf.train.AdamOptimizer().minimize(loss)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-22-da6f09a745f8> in <module>()
      3 stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
      4     labels=tf.one_hot(decoder_targets, depth=vocab_size, dtype=tf.float32),
----> 5     logits=decoder_logits,
      6 )
      7 

NameError: name 'decoder_logits' is not defined

In [23]:
sess.run(tf.global_variables_initializer())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-23-dc3b6a6ac193> in <module>()
----> 1 sess.run(tf.global_variables_initializer())

NameError: name 'sess' is not defined

In [24]:
batch_size = 100

batches = helpers.random_sequences(length_from=3, length_to=8,
                                   vocab_lower=2, vocab_upper=10,
                                   batch_size=batch_size)

print('head of the batch:')
for seq in next(batches)[:10]:
    print(seq)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-24-4090c806c49f> in <module>()
      1 batch_size = 100
      2 
----> 3 batches = helpers.random_sequences(length_from=3, length_to=8,
      4                                    vocab_lower=2, vocab_upper=10,
      5                                    batch_size=batch_size)

NameError: name 'helpers' is not defined

In [25]:
def next_feed():
    batch = next(batches)
    encoder_inputs_, encoder_input_lengths_ = helpers.batch(batch)
    decoder_targets_, _ = helpers.batch(
        [(sequence) + [EOS] + [PAD] * 2 for sequence in batch]
    )
    return {
        encoder_inputs: encoder_inputs_,
        encoder_inputs_length: encoder_input_lengths_,
        decoder_targets: decoder_targets_,
    }

In [26]:
loss_track = []

In [27]:
max_batches = 3001
batches_in_epoch = 1000

try:
    for batch in range(max_batches):
        fd = next_feed()
        _, l = sess.run([train_op, loss], fd)
        loss_track.append(l)

        if batch == 0 or batch % batches_in_epoch == 0:
            print('batch {}'.format(batch))
            print('  minibatch loss: {}'.format(sess.run(loss, fd)))
            predict_ = sess.run(decoder_prediction, fd)
            for i, (inp, pred) in enumerate(zip(fd[encoder_inputs].T, predict_.T)):
                print('  sample {}:'.format(i + 1))
                print('    input     > {}'.format(inp))
                print('    predicted > {}'.format(pred))
                if i >= 2:
                    break
            print()

except KeyboardInterrupt:
    print('training interrupted')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-d7706e679a79> in <module>()
      4 try:
      5     for batch in range(max_batches):
----> 6         fd = next_feed()
      7         _, l = sess.run([train_op, loss], fd)
      8         loss_track.append(l)

<ipython-input-25-b1b851fdab9e> in next_feed()
      1 def next_feed():
----> 2     batch = next(batches)
      3     encoder_inputs_, encoder_input_lengths_ = helpers.batch(batch)
      4     decoder_targets_, _ = helpers.batch(
      5         [(sequence) + [EOS] + [PAD] * 2 for sequence in batch]

NameError: name 'batches' is not defined

In [28]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(loss_track)
print('loss {:.4f} after {} examples (batch_size={})'.format(loss_track[-1], len(loss_track)*batch_size, batch_size))


---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-28-9166d688c1d3> in <module>()
      2 import matplotlib.pyplot as plt
      3 plt.plot(loss_track)
----> 4 print('loss {:.4f} after {} examples (batch_size={})'.format(loss_track[-1], len(loss_track)*batch_size, batch_size))

IndexError: list index out of range

In [ ]: