Wayne Nixalo - 25 Jun 2017

RNN practice in Theano -- 3rd attempt



In [1]:

    
import theano
%matplotlib inline
import sys, os
sys.path.insert(1, os.path.join('../utils'))
import utils; reload(utils)
from utils import *
from __future__ import division, print_function









    



Using Theano backend.



In [2]:

    
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))









    



corpus length: 600901



In [3]:

    
chars = sorted(list(set(text)))
vocab_size = len(chars) + 1
print('total chars:', vocab_size)









    



total chars: 86



In [4]:

    
chars.insert(0, "\0")
# ''.join(chars[1:-6])



In [5]:

    
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))



In [6]:

    
idx = [char_indices[c] for c in text]
# the 1st 10 characters:
idx[:10]









    Out[6]:





[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]



In [ ]:

    
# cs = 3
# c1_dat = [idx[i] for i in xrange(0, len(idx)-1-cs, cs)]
# c2_dat = [idx[i+1] for i in xrange(0, len(idx)-1-cs, cs)]
# c3_dat = [idx[i+2] for i in xrange(0, len(idx)-1-cs, cs)]
# c4_dat = [idx[i+3] for i in xrange(0, len(idx)-1-cs, cs)] # <-- gonna predict this

# # we can turn these into Numpy arrays just by stacking them up together
# x1 = np.stack(c1_dat[:-2]) # 1st chars
# x2 = np.stack(c2_dat[:-2]) # 2nd chars
# x3 = np.stack(c3_dat[:-2]) # 3rd chars
# # for every 4 character peice of this - collected works

# # labels will just be the 4th characters
# y = np.stack(c4_dat[:-2])

# # 1st, 2nd, 3rd chars of text
# print x1[:4], x2[:4], x3[:4]

# # 4th char of text
# print y[:3]



In [ ]:

    
# x1.shape, y.shape



In [ ]:

    
# # we're going to turn these into embeddings
# n_fac = 42
# # by creating an embedding matrix
# def embedding_input(name, n_in, n_out):
#     inp = Input(shape=(1,), dtype='int64', name=name)
#     emb = Embedding(n_in, n_out, input_length=1)(inp)
#     return inp, Flatten()(emb)

# c1_in, c1 = embedding_input('c1', vocab_size, n_fac)
# c2_in, c2 = embedding_input('c2', vocab_size, n_fac)
# c3_in, c3 = embedding_input('c3', vocab_size, n_fac)
# # c1, c2, c3 represent result of putting each char through the embedding & 
# # getting out 42 latent vectors. <-- those are input to greenarrow.



In [ ]:

    
# n_hidden = 256
# dense_in = Dense(n_hidden, activation='relu')
# c1_hidden = dense_in(c1)
# dense_hidden = Dense(n_hidden, activation='tanh')

# c2_dense = dense_in(c2) # char-2 embedding thru greenarrow
# hidden_2 = dense_hidden(c1_hidden) # output of char-1's hidden state thru orangearrow
# c2_hidden = merge([c2_dense, hidden_2]) # merge the two together (default: sum)

# c3_dense = dense_in(c3)
# hidden_3 = dense_hidden(c2_hidden)
# c3_hidden = merge([c3_dense, hidden_3])

# dense_out = Dense(vocab_size, activation='softmax') #output size: 86 <-- vocab_size

# c4_out = dense_out(c3_hidden)

# # passing in our 3 inputs & 1 output
# model = Model([c1_in, c2_in, c3_in], c4_out)

# model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())
# model.optimizer.lr=0.001



In [ ]:

    
# model.fit([x1, x2, x3], y, batch_size=64, nb_epoch=1)

THEANO RNN



In [9]:

    
cs = 8 # use 8 characters to predict the 9th

c_in_dat = [[idx[i+n] for i in xrange(0, len(idx)-1-cs, cs)] for n in range(cs)]
xs = [np.stack(c[:-2]) for c in c_in_dat]

c_out_dat = [[idx[i+n] for i in xrange(1, len(idx)-cs, cs)] for n in range(cs)]
ys = [np.stack(c[:-2]) for c in c_out_dat]

oh_ys = [to_categorical(o, vocab_size) for o in ys]
oh_y_rnn = np.stack(oh_ys, axis=1)

oh_xs = [to_categorical(o, vocab_size) for o in xs]
oh_x_rnn = np.stack(oh_xs, axis=1)

oh_x_rnn.shape, oh_y_rnn.shape









    Out[9]:





((75110, 8, 86), (75110, 8, 86))



In [10]:

    
n_hidden = 256; n_fac = 42; cs = 8

n_input = vocab_size
n_output = vocab_size

def init_wgts(rows, cols): 
    scale = math.sqrt(2/rows) # 1st calc Glorot number to scale weights
    return shared(normal(scale=scale, size=(rows, cols)).astype(np.float32))
def init_bias(rows): 
    return shared(np.zeros(rows, dtype=np.float32))
def wgts_and_bias(n_in, n_out): 
    return init_wgts(n_in, n_out), init_bias(n_out)
def id_and_bias(n): 
    return shared(np.eye(n, dtype=np.float32)), init_bias(n)

# Theano variables
t_inp = T.matrix('inp')
t_outp = T.matrix('outp')
t_h0 = T.vector('h0')
lr = T.scalar('lr')

all_args = [t_h0, t_inp, t_outp, lr]

W_h = id_and_bias(n_hidden)
W_x = wgts_and_bias(n_input, n_hidden)
W_y = wgts_and_bias(n_hidden, n_output)
w_all = list(chain.from_iterable([W_h, W_x, W_y]))

def step(x, h, W_h, b_h, W_x, b_x, W_y, b_y):
    # Calculate the hidden activations
    h = nnet.relu(T.dot(x, W_x) + b_x + T.dot(h, W_h) + b_h)
    # Calculate the output activations
    y = nnet.softmax(T.dot(h, W_y) + b_y)
    # Return both (the 'Flatten()' is to work around a theano bug)
    return h, T.flatten(y, 1)

[v_h, v_y], _ = theano.scan(step, sequences=t_inp,
                            outputs_info=[t_h0, None], non_sequences=w_all)

error = nnet.categorical_crossentropy(v_y, t_outp).sum()
g_all = T.grad(error, w_all)

def upd_dict(wgts, grads, lr):
    return OrderedDict({w: w-g*lr for (w,g) in zip(wgts,grads)})

upd = upd_dict(w_all, g_all, lr)

# we're finally ready to compile the function!:
fn = theano.function(all_args, error, updates=upd, allow_input_downcast=True)

X = oh_x_rnn
Y = oh_y_rnn
X.shape, Y.shape

err=0.0; l_rate=0.01
for i in xrange(len(X)):
    err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
    if i % 1000 == 999:
        print ("Error:{:.3f}".format(err/1000))
        err=0.0









    



/Users/WayNoxchi/Miniconda3/Theano/theano/tensor/basic.py:5130: UserWarning: flatten outdim parameter is deprecated, use ndim instead.
  "flatten outdim parameter is deprecated, use ndim instead.")






    



Error:25.145
Error:21.382
Error:20.902
Error:19.896
Error:18.812
Error:19.244
Error:19.097
Error:18.494
Error:17.925
Error:18.202
Error:17.502
Error:17.615
Error:18.461
Error:17.315
Error:16.782
Error:17.773
Error:17.333
Error:17.251
Error:16.822
Error:16.706
Error:16.546
Error:16.386
Error:16.727
Error:16.118
Error:16.795
Error:16.591
Error:16.019
Error:16.326
Error:16.223
Error:16.511
Error:16.686
Error:16.429
Error:16.664
Error:16.291
Error:16.009
Error:16.672
Error:15.998
Error:16.431
Error:16.081
Error:16.268
Error:15.323
Error:15.704
Error:15.754
Error:15.956
Error:15.917
Error:15.858
Error:15.637
Error:16.027
Error:16.016
Error:16.037
Error:15.219
Error:15.571
Error:15.002
Error:14.894
Error:15.676
Error:15.402
Error:14.696
Error:15.468
Error:15.095
Error:15.011
Error:15.033
Error:15.527
Error:15.278
Error:15.077
Error:14.729
Error:14.817
Error:14.281
Error:14.751
Error:15.191
Error:14.712
Error:15.226
Error:14.772
Error:14.493
Error:14.514
Error:14.471



In [11]:

    
f_y = theano.function([t_h0, t_inp], v_y, allow_input_downcast=True)

pred = np.argmax(f_y(np.zeros(n_hidden), X[6]), axis=1)

act = np.argmax(X[6], axis=1)



In [12]:

    
[indices_char[o] for o in act]









    Out[12]:





['t', 'h', 'e', 'n', '?', ' ', 'I', 's']



In [13]:

    
[indices_char[o] for o in pred]









    Out[13]:





['h', 'e', ' ', ' ', ' ', 'T', 't', ' ']