Wayne Nixalo - 25 Jun 2017

RNN practice in Theano -- 2nd attempt



In [2]:

    
import os, sys
sys.path.insert(1, os.path.join('../utils'))
from utils import *
import theano









    



Using Theano backend.



In [3]:

    
path = get_file('nietzsche.txt', origin='https://s3.amazonaws/com/text-datasets/nietzsche.txt')
text = open(path).read()
print('corpus length:', len(text))









    



('corpus length:', 600901)



In [4]:

    
chars = sorted(list(set(text)))
vocab_size = len(chars) + 1
print('total chars:', vocab_size)









    



('total chars:', 86)



In [5]:

    
chars.insert(0, '\0')
''.join(chars[1:-6])









    Out[5]:





'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz'



In [11]:

    
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))



In [12]:

    
idx = [char_indices[c] for c in text]
idx[:10]









    Out[12]:





[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]



In [13]:

    
''.join(indices_char[i] for i in idx[:70])









    Out[13]:





'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'



In [19]:

    
n_hidden, n_fac, cs, vocab_size = (256, 42, 8, 86)



In [20]:

    
c_in_dat = [[idx[i+n] for i in xrange(0, len(idx) - 1 - cs, cs)] for n in range(cs)]
# c_out_dat = [idx[i+cs] for i in xrange(0, len(idx) - 1 - cs, cs)]
c_out_dat = [[idx[i+n] for i in xrange(1, len(idx) - cs, cs)] for n in range(cs)]

xs = [np.stack(c[:-2]) for c in c_in_dat]
# y = np.stack(c_out_dat[:-2])
ys = [np.stack(c[:-2]) for c in c_out_dat]



In [51]:

    
# [xs[n][:cs] for n in range(cs)]
xs[0].shape
xs = xs.expand_dims()









    Out[51]:





(75110,)



In [21]:

    
[xs[n][:cs] for n in range(cs)]









    Out[21]:





[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]



In [22]:

    
[ys[n][:cs] for n in range(cs)]









    Out[22]:





[array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67]),
 array([ 1, 33,  2, 72, 67, 73,  2, 68])]



In [24]:

    
oh_ys = [to_categorical(o, vocab_size) for o in ys]
oh_y_rnn = np.stack(oh_ys, axis=1)

oh_xs = [to_categorical(o, vocab_size) for o in xs]
oh_x_rnn = np.stack(oh_xs, axis=1)

oh_x_rnn.shape, oh_y_rnn.shape









    Out[24]:





((75110, 8, 86), (75110, 8, 86))



In [25]:

    
# THEANO RNN



In [26]:

    
n_input = vocab_size
n_output = vocab_size



In [27]:

    
def init_wgts(rows, cols):
    scale = math.sqrt(2/rows)
    return shared(normal(scale=scale, size=(rows, cols)).astype(np.float32))
def init_bias(rows):
    return shared(np.zeros(rows, dtype=np.float32))



In [28]:

    
def wgts_and_bias(n_in, n_out):
    return init_wgts(n_in, n_out), init_bias(n_out)
def id_and_bias(n):
    return shared(np.eye(n, dtype=np.float32)), init_bias(n)



In [29]:

    
t_inp = T.matrix('inp')
t_outp = T.matrix('outp')
t_h0 = T.vector('h0')
lr = T.scalar('lr')

all_args = [t_h0, t_inp, t_outp, lr]



In [30]:

    
W_h = id_and_bias(n_hidden)
W_x = wgts_and_bias(n_input, n_hidden)
W_y = wgts_and_bias(n_hidden, n_output)
w_all = list(chain.from_iterable([W_h, W_x, W_y]))



In [31]:

    
def step(x, h, W_h, b_h, W_x, b_x, W_y, b_y):
    # Calculate the hidden activations
    h = nnet.relu(T.dot(x, W_x) + b_x + T.dot(h, W_h) + b_h)
    # Calculate the output activations
    y = nnet.softmax(T.dot(h, W_y) + b_y)
    # Return both (the `Flatten()` is to work around a Theano bug)
    return h, T.flatten(y, 1)



In [32]:

    
[v_h, v_y], _ = theano.scan(step, sequences=t_inp,
                            outputs_info=[t_h0, None], non_sequences=w_all)



In [34]:

    
error = nnet.categorical_crossentropy(v_y, t_outp).sum()
g_all = T.grad(error, w_all)



In [36]:

    
def upd_dict(wgts, grads, lr):
    return OrderedDict({w: w - g * lr for (w,g) in zip(wgts, grads)})

upd = upd_dict(w_all, g_all, lr)



In [37]:

    
fn = theano.function(all_args, error, updates=upd, allow_input_downcast=True)
X = oh_x_rnn
Y = oh_y_rnn
X.shape, Y.shape









    



/Users/WayNoxchi/Miniconda3/Theano/theano/tensor/basic.py:5130: UserWarning: flatten outdim parameter is deprecated, use ndim instead.
  "flatten outdim parameter is deprecated, use ndim instead.")






    Out[37]:





((75110, 8, 86), (75110, 8, 86))



In [40]:

    
err=0.0; l_rate=0.01
for i in xrange(len(X)):
    err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
    if i % 1000 == 999:
        print ("Error:{:.3f}".format(err/1000))
        err=0.0









    



Error:26.383
Error:25.537
Error:25.596
Error:25.438
Error:25.291
Error:25.452
Error:25.153
Error:25.084
Error:25.136
Error:25.483
Error:24.790
Error:25.001
Error:26.439
Error:25.009
Error:24.883
Error:26.043
Error:25.936
Error:25.713
Error:25.037
Error:24.935
Error:24.869
Error:25.089
Error:25.348
Error:24.951
Error:25.125
Error:25.142
Error:25.315
Error:24.994
Error:25.113
Error:25.299
Error:25.436
Error:25.178
Error:25.547
Error:25.049
Error:25.252
Error:25.590
Error:25.059
Error:25.379
Error:25.276
Error:25.588
Error:24.948
Error:24.952
Error:25.199
Error:25.383
Error:25.642
Error:25.678
Error:25.148
Error:24.072
Error:24.635
Error:24.867
Error:24.429
Error:24.571
Error:24.357
Error:24.355
Error:24.774
Error:24.590
Error:24.631
Error:24.551
Error:24.523
Error:24.688
Error:24.431
Error:24.619
Error:24.659
Error:24.774
Error:24.477
Error:24.342
Error:24.341
Error:24.398
Error:24.153
Error:24.214
Error:24.885
Error:24.407
Error:24.177
Error:24.022
Error:23.951



In [41]:

    
f_y = theano.function([t_h0, t_inp], v_y, allow_input_downcast=True)



In [42]:

    
pred = np.argmax(f_y(np.zeros(n_hidden), X[6]), axis=1)
act = np.argmax(X[6], axis=1)



In [43]:

    
[indices_char[o] for o in act]









    Out[43]:





['t', 'h', 'e', 'n', '?', ' ', 'I', 's']



In [44]:

    
[indices_char[o] for o in pred]









    Out[44]:





[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']



In [ ]:



In [ ]: