In [5]:
import theano
import sys, os
sys.path.insert(1, os.path.join('../utils'))
from utils import *

path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
vocab_size = len(chars) + 1
print('total chars:', vocab_size)

chars.insert(0, "\0")


char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

idx = [char_indices[c] for c in text]

n_fac = 42
n_hidden = 256
cs = 8

c_in_dat = [[idx[i+n] for i in xrange(0, len(idx)-1-cs, cs)] for n in range(cs)]
# c_out_dat = [[idx[i+n] for i in xrange(1, len(idx)-cs, cs)] for n in range(cs)]

xs = [np.stack(c[:-2]) for c in c_in_dat]
ys = [np.stack(c[:-2]) for c in c_out_dat]

# before model fit
[xs[n][:cs] for n in range(cs)]


('corpus length:', 600901)
('total chars:', 86)
Out[5]:
[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [6]:
c_out_dat = [idx[i+cs] for i in xrange(0, len(idx)-1-cs,cs)]
y = np.stack(c_out_dat[:-2])

def embedding_input(name, n_in, n_out):
    inp = Input(shape=(1,), dtype='int64', name=name+'_in')
    emb = Embedding(n_in, n_out, input_length=1, name=name+'_emb')(inp)
    return inp, Flatten()(emb)

c_ins = [embedding_input('c'+str(n), vocab_size, n_fac) for n in range(cs)]
n_hidden = 256

dense_in = Dense(n_hidden, activation='relu')
dense_hidden = Dense(n_hidden, activation='relu', init='identity')
dense_out = Dense(vocab_size, activation='softmax')

hidden = dense_in(c_ins[0][1])

for i in range(1,cs):
    c_dense = dense_in(c_ins[i][1]) #green arrow
    hidden = dense_hidden(hidden)   #orange arrow
    hidden = merge([c_dense, hidden]) #merge the two together
    
    c_out = dense_out(hidden)

In [7]:
model = Model([c[0] for c in c_ins], c_out)
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())
model.fit(xs, y, batch_size=64, nb_epoch=1)


Epoch 1/1
75110/75110 [==============================] - 7s - loss: 2.5326     
Out[7]:
<keras.callbacks.History at 0x11aee2490>

In [8]:
# after model fit
[xs[n][:cs] for n in range(cs)]


Out[8]:
[array([[40],
        [ 1],
        [33],
        [ 2],
        [72],
        [67],
        [73],
        [ 2]]), array([[42],
        [ 1],
        [38],
        [44],
        [ 2],
        [ 9],
        [61],
        [73]]), array([[29],
        [43],
        [31],
        [71],
        [54],
        [ 9],
        [58],
        [61]]), array([[30],
        [45],
        [ 2],
        [74],
        [ 2],
        [76],
        [67],
        [58]]), array([[25],
        [40],
        [73],
        [73],
        [76],
        [61],
        [24],
        [71]]), array([[27],
        [40],
        [61],
        [61],
        [68],
        [54],
        [ 2],
        [58]]), array([[29],
        [39],
        [54],
        [ 2],
        [66],
        [73],
        [33],
        [ 2]]), array([[ 1],
        [43],
        [73],
        [62],
        [54],
        [ 2],
        [72],
        [67]])]

In [9]:
c_out_dat = [[idx[i+n] for i in xrange(1, len(idx)-cs, cs)] for n in range(cs)]
ys = [np.stack(c[:-2]) for c in c_out_dat]

oh_ys = [to_categorical(o, vocab_size) for o in ys]
oh_y_rnn = np.stack(oh_ys, axis=1)

oh_xs = [to_categorical(o, vocab_size) for o in xs]
oh_x_rnn = np.stack(oh_xs, axis=1)

oh_x_rnn.shape, oh_y_rnn.shape


Out[9]:
((75110, 8, 86), (75110, 8, 86))

In [10]:
# THEANO RNN

n_input = vocab_size
n_output = vocab_size

def init_wgts(rows, cols):
    scale = math.sqrt(2/rows)
    return shared(normal(scale=scale, size=(rows,cols)).astype(np.float32))
def init_bias(rows):
    return shared(np.zeros(rows, dtype=np.float32))
def wgts_and_bias(n_in, n_out):
    return init_wgts(n_in, n_out), init_bias(n_out)
def id_and_bias(n):
    return shared(np.eye(n, dtype=np.float32)), init_bias(n)

# Theano Variables
t_inp = T.matrix('inp')
t_outp = T.matrix('outp')
t_h0 = T.vector('h0')
lr = T.scalar('lr')

all_args = [t_h0, t_inp, t_outp, lr]

W_h = id_and_bias(n_hidden)
W_x = wgts_and_bias(n_input, n_hidden)
W_y = wgts_and_bias(n_hidden, n_output)
w_all = list(chain.from_iterable([W_h, W_x, W_y]))

def step(x, h, W_h, b_h, W_x, b_x, W_y, b_y):
    # Calculate the hidden activations
    h = nnet.relu(T.dot(x, W_x) + b_x + T.dot(h, W_h) + b_h)
    # Calculate the output activations
    y = nnet.softmax(T.dot(h, W_y) + b_y)
    # Return both (the `Flatten()` is to work around a Theano bug)
    return h, T.flatten(y, 1)

[v_h, v_y], _ = theano.scan(step, sequences=t_inp,
                             outputs_info=[t_h0, None], non_sequences=w_all)

error = nnet.categorical_crossentropy(v_y, t_outp).sum()
g_all = T.grad(error, w_all)

def upd_dict(wgts, grads, lr):
    return OrderedDict({w: w - g * lr for (w,g) in zip(wgts, grads)})

upd = upd_dict(w_all, g_all, lr)

fn = theano.function(all_args, error, updates=upd, allow_input_downcast=True)

X = oh_x_rnn
Y = oh_y_rnn
print X.shape, Y.shape

err = 0.0; l_rate = 0.01
for i in xrange(len(X)):
    err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
    if i % 1000 == 999:
        print ("ErrorX:{:.3f}".format(err/1000))
        err=0.0


/Users/WayNoxchi/Miniconda3/Theano/theano/tensor/basic.py:5130: UserWarning: flatten outdim parameter is deprecated, use ndim instead.
  "flatten outdim parameter is deprecated, use ndim instead.")
(75110, 8, 86) (75110, 8, 86)
ErrorX:28.986
ErrorX:25.977
ErrorX:25.761
ErrorX:25.518
ErrorX:25.341
ErrorX:25.490
ErrorX:25.177
ErrorX:25.101
ErrorX:25.151
ErrorX:25.495
ErrorX:24.801
ErrorX:25.009
ErrorX:26.446
ErrorX:25.014
ErrorX:24.888
ErrorX:26.046
ErrorX:25.932
ErrorX:25.714
ErrorX:25.041
ErrorX:24.938
ErrorX:24.872
ErrorX:25.092
ErrorX:25.351
ErrorX:24.953
ErrorX:25.128
ErrorX:25.144
ErrorX:25.317
ErrorX:24.996
ErrorX:25.114
ErrorX:25.300
ErrorX:25.438
ErrorX:25.180
ErrorX:25.548
ErrorX:25.050
ErrorX:25.253
ErrorX:25.591
ErrorX:25.060
ErrorX:25.380
ErrorX:25.277
ErrorX:25.589
ErrorX:24.948
ErrorX:24.953
ErrorX:25.200
ErrorX:25.384
ErrorX:25.643
ErrorX:25.679
ErrorX:25.149
ErrorX:24.073
ErrorX:24.635
ErrorX:24.867
ErrorX:24.429
ErrorX:24.570
ErrorX:24.357
ErrorX:24.355
ErrorX:24.773
ErrorX:24.590
ErrorX:24.631
ErrorX:24.551
ErrorX:24.523
ErrorX:24.688
ErrorX:24.431
ErrorX:24.619
ErrorX:24.658
ErrorX:24.774
ErrorX:24.477
ErrorX:24.342
ErrorX:24.341
ErrorX:24.398
ErrorX:24.153
ErrorX:24.214
ErrorX:24.885
ErrorX:24.407
ErrorX:24.177
ErrorX:24.022
ErrorX:23.951

Okay, thoroughly lost.. Need to go back through the lesson 6 notebook..


In [ ]: