In [3]:
"""
Minimal character-level TensorFlow RNN model.
Original code written by Andrej Karpathy (@karpathy), adapted to TensorFlow by Damien Henry (@dh7net)
BSD License
"""
import numpy as np
import tensorflow as tf
tf.reset_default_graph() # Only usefull when used in Jupyter if you want to run the code several times

# data I/O
data = open('methamorphosis.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print 'data has %d characters, %d unique.' % (data_size, vocab_size)
char_to_ix = { ch:i for i,ch in enumerate(chars) } # to convert a char to an ID
ix_to_char = { i:ch for i,ch in enumerate(chars) } # to convert an ID back to a char

# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-2

# model parameters
Wxh = tf.Variable(tf.random_uniform((hidden_size, vocab_size))*0.01, name='Wxh') # input to hidden
Whh = tf.Variable(tf.random_uniform((hidden_size, hidden_size))*0.01, name='Whh') # hidden to hidden
Why = tf.Variable(tf.random_uniform((vocab_size, hidden_size))*0.01, name='Why') # hidden to output
bh = tf.Variable(tf.zeros((hidden_size, 1)), name='bh') # hidden bias
by = tf.Variable(tf.zeros((vocab_size, 1)), name='by') # output bias

# loss function
#  Define placeholder to for the input and the target & create the sequences
input_data = tf.placeholder(tf.float32, [seq_length, vocab_size], name='input_data')
xs = tf.split(0, seq_length, input_data)
target_data = tf.placeholder(tf.float32, [seq_length, vocab_size], name='target_data') 
targets = tf.split(0, seq_length, target_data)  
#  initial_state & loss
initial_state = tf.zeros((hidden_size, 1))
loss = tf.zeros([1], name='loss')
#  unroll recursion to create the loss
hs, ys, ps = {}, {}, {}
hs[-1] = initial_state
# forward pass                                                                                                                                                                              
for t in xrange(seq_length):
    xs_t = tf.transpose(xs[t])
    targets_t = tf.transpose(targets[t]) 
    hs[t] = tf.tanh(tf.matmul(Wxh, xs_t) + tf.matmul(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = tf.matmul(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = tf.exp(ys[t]) / tf.reduce_sum(tf.exp(ys[t])) # probabilities for next chars
    loss += -tf.log(tf.reduce_sum(tf.mul(ps[t], targets_t))) # softmax (cross-entropy loss)

cost = loss / seq_length
final_state = hs[seq_length-1]
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), 5) # clip to mitigate exploding gradients
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.apply_gradients(zip(grads, tvars))

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in xrange(n):
    h = np.tanh(np.dot(Wxh.eval(), x) + np.dot(Whh.eval(), h) + bh.eval())
    y = np.dot(Why.eval(), h) + by.eval()
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

def vectorize(x): # take an array of IX and return an array of vector
    vectorized = np.zeros((len(x), vocab_size))
    for i in range(0, len(x)):
        vectorized[i][x[i]] = 1
    return vectorized

n, p = 0, 0
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    print "all variable initialized"
    while True:
        # prepare inputs (we're sweeping from left to right in steps seq_length long)
        if p+seq_length+1 >= len(data) or n == 0: 
            state = initial_state.eval() # reset RNN memory
            p = 0 # go from start of data
        x = vectorize([char_to_ix[ch] for ch in data[p:p+seq_length]])
        y = vectorize([char_to_ix[ch] for ch in data[p+1:p+seq_length+1]])
        # Create the structure for the learning data
        feed = {input_data: x, target_data: y, initial_state: state}
        # Run a session using train_op
        [train_loss], state, _ = sess.run([cost, final_state, train_op], feed)
        smooth_loss = smooth_loss * 0.999 + train_loss * 0.001
        # sample from the model now and then
        if n % 1000 == 0:
            print 'iter %d, loss: %f' % (n, smooth_loss) # print progress
            #sample(sess)
            sample_ix = sample(state, char_to_ix['A'], 200)
            txt = ''.join(ix_to_char[ix] for ix in sample_ix)
            print '----\n %s \n----' % (txt, )

        p += seq_length # move data pointer
        n += 1 # iteration counter


data has 119163 characters, 61 unique.
all variable initialized
iter 0, loss: 102.673186
----
 .Dt) Bhtfq(CpHsI,G
zYpTTD;.,JdB lOU
qi.?uy:Erer!
aUhwVBo;rqFLzz:,aQ!BngUi:GbfAWf
A.uvUODoUEqpBHMmoxNAGoCm"AeIlyGy
LvIMU'PfajH)njUcz,LQgi?AbF(aSGArCEWtLpDP;DINryil?LJW)IWAbtDpnt'ynq b
o?!CSSMS'(zgPeNlt 
----
iter 100, loss: 93.292775
----
 akn'ifudvyAVip hEyniqheuYan i.
Guke,if;zob plo;nvg iuu;"iw;xaw; se xu
s,SieYofeYmt Fxak(yQ(obCYeFlyeePcdiheyzcn!hl;biS oupbdUubiirknoce;ooki"S yrW h.t
:ope,uarSSf;gFan za "iad 'feylJ  ify:kff-AMsn oge 
----
iter 200, loss: 84.733920
----
 slgisnfrgenug?ertrctss fdtn  aehsa eeubs l   voo o qe  wuvepon eyoh owseaea ilbaee neau  as tfnnb  s h ehetWaieefa teaai rbeYa g t tleeie nsellaad gH oh lcneide e  seheosi nttieta 'tuzs tkaicgdgrhnoks 
----
iter 300, loss: 76.980630
----
 ftamtucmn np  ekhaunaopg tye evod"cso e delueiinnce cumme"sotlmce new g demtaTt,astpee c s, ay  e ,y  aifoiGtabniiogm 
gs fdiroo
 eyp 
oei,srh  maeuettob m eukrdGaem EVo ec ,homo u d,s hlief sneoicp g 
----
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-3-ee6ab1f0572d> in <module>()
     95         feed = {input_data: x, target_data: y, initial_state: state}
     96         # Run a session using train_op
---> 97         [train_loss], state, _ = sess.run([cost, final_state, train_op], feed)
     98         smooth_loss = smooth_loss * 0.999 + train_loss * 0.001
     99         # sample from the model now and then

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    338     try:
    339       result = self._run(None, fetches, feed_dict, options_ptr,
--> 340                          run_metadata_ptr)
    341       if run_metadata:
    342         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    562     try:
    563       results = self._do_run(handle, target_list, unique_fetches,
--> 564                              feed_dict_string, options, run_metadata)
    565     finally:
    566       # The movers are no longer used. Delete them.

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
    635     if handle is None:
    636       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 637                            target_list, options, run_metadata)
    638     else:
    639       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
    642   def _do_call(self, fn, *args):
    643     try:
--> 644       return fn(*args)
    645     except tf_session.StatusNotOK as e:
    646       error_message = compat.as_text(e.error_message)

/Users/damienhenry/code/jupyter/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run_fn(session, feed_dict, fetch_list, target_list, options, run_metadata)
    626       else:
    627         return tf_session.TF_Run(
--> 628             session, None, feed_dict, fetch_list, target_list, None)
    629 
    630     def _prun_fn(session, handle, feed_dict, fetch_list):

KeyboardInterrupt: