Wayne Nixalo - 22 Jun 2017 RNN practice in Theano


In [1]:
# library imports
import os, sys
sys.path.insert(1, os.path.join('../utils'))
# import utils; reload(utils)
from utils import *


Using Theano backend.

In [2]:
# Loading data - as in Lesson 6 JNB
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
vocab_size = len(chars) + 1 # +1 for adding Null char
chars.insert(0, "\0")
print('total chars:', vocab_size)


('corpus length:', 600901)
('total chars:', 86)

In [23]:
# Data format-stuff

# char <--> index mapping:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# `idx` will be the data used: converts all chars to index based on mapping
idx = [char_indices[c] for c in text]

# choose size for hidden state
n_hidden = 256
# choose no. embeedding factors // not needed 
# n_fac = 42
# char-seq length
cs = 8

# formatting char in/out data
c_in_dat  = [[idx[i+n] for i in xrange(0, len(idx)-cs-1, cs)] for n in xrange(cs)]
c_out_dat = [[idx[i+n] for i in xrange(1, len(idx)-cs, cs)] for n in xrange(cs)]

# the -2 is to omit the added Null char
ys = [np.stack(c[:-2]) for c in c_out_dat]
xs = [np.stack(c[:-2]) for c in c_in_dat]

# Need to One-Hot Encode input/output:
oh_ys = [to_categorical(o, vocab_size) for o in ys]
oh_y_rnn = np.stack(oh_ys, axis=1)
oh_xs = [to_categorical(o, vocab_size) for o in xs]
oh_x_rnn = np.stack(oh_xs, axis=1)


  File "<ipython-input-23-3202a8331fcd>", line 22
    ys = [np.stack(c[:-2]) for c in c_out_dat]
     ^
SyntaxError: invalid syntax

In [4]:
# tell Theano the in/out data size
n_input = vocab_size
n_output = vocab_size

In [5]:
# Define weight & bias initializations for Theano -- shared vars
def init_wgts(rows, cols):
    scale = math.sqrt(2/rows) # Glorot initilztn
    return shared(normal(scale=scale, size=(rows,cols)).astype(np.float32))
def  init_bias(rows):
    return shared(np.zeros(rows, dtype=np.float32))

# returning weights & biases wrapped in a tuple; hidden weights intlzd as Id_matx
def wgts_and_bias(n_in, n_out):
    return init_wgts(n_in, n_out), init_bias(n_out)
def id_and_bias(n):
    return shared(np.eye(n, dtype=np.float32)), init_bias(n)

In [6]:
# beginning constructn of computatn graph w/ Theano vars:
t_inp = T.matrix('inp')
t_outp = T.matrix('outp')
t_h0 = T.vector('h0')
lr = T.scalar('lr')

all_args = [t_h0, t_inp, t_outp, lr]

# Creating initial weight matrices:
W_h = id_and_bias(n_hidden)
W_x = wgts_and_bias(n_input, n_hidden)
W_y = wgts_and_bias(n_hidden, n_output)
w_all = list(chain.from_iterable([W_h, W_x, W_y]))

In [7]:
# Defining for Theano the operation that goes on at each step:
def step(x, h, W_h, b_h, W_x, b_x, W_y, b_y):
    """This fn performs a single forward pass for one char"""
    # Calculate hidden activations
    h = nnet.relu(T.dot(x, W_x) + b_x + T.dot(h, W_h) + b_h)
    # Calculate output activations
    y = nnet.softmax(T.dot(h, W_y) + b_y)
    # Return both. NOTE: the `Flatten()` is a workaround for a Theano bug
    return h, T.flatten(y, 1)

In [8]:
# setting up the scan operation -- the vars
[v_h, v_y], _ = theano.scan(step, sequences=t_inp,
                            outputs_info=[t_h0, None], non_sequences=w_all)

# defining calculation of loss function and gradients (auto in Theano)
error = nnet.categorical_crossentropy(v_y, t_outp).sum()
g_all = T.grad(error, w_all)

In [9]:
# Defining for Theano how to perform SGD (online). Dictionary of updates
# which completes after every forward-pass -- applying SGD update rule 
# to every weight.
def upd_dict(wgts, grads, lr):
    return OrderedDict({w: w - g * lr for (w, g) in zip(wgts, grads)})

upd = upd_dict(w_all, g_all, lr)

In [10]:
# ready to compile the function:
fn = theano.function(all_args, error, updates = upd, allow_input_downcast = True)


/Users/WayNoxchi/Miniconda3/Theano/theano/tensor/basic.py:5130: UserWarning: flatten outdim parameter is deprecated, use ndim instead.
  "flatten outdim parameter is deprecated, use ndim instead.")

In [11]:
# the data
X = oh_x_rnn
Y = oh_y_rnn

# just taking a look
X.shape, Y.shape


Out[11]:
((75110, 8, 86), (75110, 8, 86))

In [12]:
# Manually defining the Loop -- Theano is for GPU - no built-in serial looping
def loop(l_rate=0.01):
    err = 0.0; # l_rate = 0.01
    for i in xrange(len(X)):
        err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
        # print progress every 1000 loops
        if i % 1000 == 999:
            print ("Error:{:.3f}".format(err/1000))
            err = 0.0
loop(l_rate=0.001)
loop(l_rate=0.01)


Error:34.151
Error:31.617
Error:30.001
Error:28.831
Error:28.046
Error:27.810
Error:27.215
Error:26.847
Error:26.648
Error:26.734
Error:26.112
Error:26.157
Error:26.977
Error:26.001
Error:25.816
Error:26.649
Error:26.489
Error:26.317
Error:25.737
Error:25.587
Error:25.539
Error:25.673
Error:25.795
Error:25.492
Error:25.591
Error:25.618
Error:25.748
Error:25.433
Error:25.508
Error:25.703
Error:25.769
Error:25.561
Error:25.841
Error:25.388
Error:25.573
Error:25.839
Error:25.341
Error:25.634
Error:25.537
Error:25.844
Error:25.225
Error:25.214
Error:25.435
Error:25.592
Error:25.829
Error:25.917
Error:25.384
Error:24.487
Error:24.924
Error:25.032
Error:24.693
Error:24.782
Error:24.597
Error:24.588
Error:24.914
Error:24.778
Error:24.802
Error:24.746
Error:24.666
Error:24.876
Error:24.601
Error:24.771
Error:24.813
Error:24.921
Error:24.657
Error:24.535
Error:24.522
Error:24.580
Error:24.331
Error:24.423
Error:24.980
Error:24.550
Error:24.351
Error:24.203
Error:24.142
Error:25.719
Error:25.070
Error:25.362
Error:25.297
Error:25.185
Error:25.359
Error:25.087
Error:25.033
Error:25.091
Error:25.454
Error:24.756
Error:24.978
Error:26.431
Error:24.986
Error:24.864
Error:26.038
Error:25.944
Error:25.710
Error:25.023
Error:24.923
Error:24.859
Error:25.079
Error:25.339
Error:24.944
Error:25.117
Error:25.135
Error:25.309
Error:24.989
Error:25.108
Error:25.293
Error:25.431
Error:25.174
Error:25.543
Error:25.045
Error:25.249
Error:25.586
Error:25.057
Error:25.376
Error:25.273
Error:25.585
Error:24.944
Error:24.950
Error:25.197
Error:25.381
Error:25.641
Error:25.677
Error:25.146
Error:24.071
Error:24.634
Error:24.867
Error:24.428
Error:24.571
Error:24.357
Error:24.354
Error:24.775
Error:24.590
Error:24.631
Error:24.550
Error:24.525
Error:24.687
Error:24.431
Error:24.619
Error:24.658
Error:24.775
Error:24.476
Error:24.341
Error:24.340
Error:24.398
Error:24.152
Error:24.213
Error:24.884
Error:24.406
Error:24.177
Error:24.021
Error:23.951

In [13]:
# output / prediction fn thing?
f_y = theano.function([t_h0, t_inp], v_y, allow_input_downcast=True)

In [14]:
pred = np.argmax(f_y(np.zeros(n_hidden), X[6]), axis=1)

In [15]:
act = np.argmax(X[6], axis=1)

In [16]:
[indices_char[o] for o in act]


Out[16]:
['t', 'h', 'e', 'n', '?', ' ', 'I', 's']

In [17]:
[indices_char[o] for o in pred]


Out[17]:
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']

In [18]:
act2 = np.argmax(X[16], axis=1)
pred2 = np.argmax(f_y(np.zeros(n_hidden), X[16]), axis=1)

In [19]:
[indices_char[o] for o in act2]


Out[19]:
['y', ' ', 'h', 'a', 'v', 'e', ' ', 'b']

In [20]:
[indices_char[o] for o in pred2]


Out[20]:
[' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']

In [21]:
oh_x_rnn.shape


Out[21]:
(75110, 8, 86)

In [22]:
[xs[n][:cs] for n in range(cs)]


Out[22]:
[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [24]:
# NOTE: I need to rework this.

In [ ]: