In [2]:
import theano
import sys, os
sys.path.insert(1, os.path.join('../utils'))
from utils import *

path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

chars = sorted(list(set(text)))
vocab_size = len(chars) + 1
print('total chars:', vocab_size)

chars.insert(0, "\0")


char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

idx = [char_indices[c] for c in text]

n_fac = 42
n_hidden = 256
cs = 8

c_in_dat = [[idx[i+n] for i in xrange(0, len(idx)-1-cs, cs)] for n in range(cs)]
c_out_dat = [[idx[i+n] for i in xrange(1, len(idx)-cs, cs)] for n in range(cs)]

xs = [np.stack(c[:-2]) for c in c_in_dat]
ys = [np.stack(c[:-2]) for c in c_out_dat]

# before model fit
[xs[n][:cs] for n in range(cs)]


('corpus length:', 600901)
('total chars:', 86)
Out[2]:
[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [3]:
model = Sequential([
            SimpleRNN(n_hidden, return_sequences=True, input_shape=(cs, vocab_size),
                      activation='relu', inner_init='identity'),
            TimeDistributed(Dense(vocab_size, activation='softmax')),
])
model.compile(loss='categorical_crossentropy', optimizer=Adam())

# no embedding layer, so inputs must be onhotted
oh_ys = [to_categorical(o, vocab_size) for o in ys]
oh_y_rnn = np.stack(oh_ys, axis=1)

oh_xs = [to_categorical(o, vocab_size) for o in xs]
oh_x_rnn = np.stack(oh_xs, axis=1)

oh_x_rnn.shape, oh_y_rnn.shape


Out[3]:
((75110, 8, 86), (75110, 8, 86))

In [5]:
model.fit(oh_x_rnn, oh_y_rnn, batch_size=64, nb_epoch=1)


/Users/WayNoxchi/Miniconda3/Theano/theano/tensor/basic.py:5130: UserWarning: flatten outdim parameter is deprecated, use ndim instead.
  "flatten outdim parameter is deprecated, use ndim instead.")
Epoch 1/1
75110/75110 [==============================] - 14s - loss: 2.4462    
Out[5]:
<keras.callbacks.History at 0x11983f150>

In [6]:
# after model fit
[xs[n][:cs] for n in xrange(cs)]


Out[6]:
[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [7]:
# THEANO RNN

n_input = vocab_size
n_output = vocab_size

def init_wgts(rows, cols):
    scale = math.sqrt(2/rows)
    return shared(normal(scale=scale, size=(rows,cols)).astype(np.float32))
def init_bias(rows):
    return shared(np.zeros(rows, dtype=np.float32))
def wgts_and_bias(n_in, n_out):
    return init_wgts(n_in, n_out), init_bias(n_out)
def id_and_bias(n):
    return shared(np.eye(n, dtype=np.float32)), init_bias(n)

# Theano Variables
t_inp = T.matrix('inp')
t_outp = T.matrix('outp')
t_h0 = T.vector('h0')
lr = T.scalar('lr')

all_args = [t_h0, t_inp, t_outp, lr]

W_h = id_and_bias(n_hidden)
W_x = wgts_and_bias(n_input, n_hidden)
W_y = wgts_and_bias(n_hidden, n_output)
w_all = list(chain.from_iterable([W_h, W_x, W_y]))

def step(x, h, W_h, b_h, W_x, b_x, W_y, b_y):
    # Calculate the hidden activations
    h = nnet.relu(T.dot(x, W_x) + b_x + T.dot(h, W_h) + b_h)
    # Calculate the output activations
    y = nnet.softmax(T.dot(h, W_y) + b_y)
    # Return both (the `Flatten()` is to work around a Theano bug)
    return h, T.flatten(y, 1)

[v_h, v_y], _ = theano.scan(step, sequences=t_inp,
                             outputs_info=[t_h0, None], non_sequences=w_all)

error = nnet.categorical_crossentropy(v_y, t_outp).sum()
g_all = T.grad(error, w_all)

def upd_dict(wgts, grads, lr):
    return OrderedDict({w: w - g * lr for (w,g) in zip(wgts, grads)})

upd = upd_dict(w_all, g_all, lr)

fn = theano.function(all_args, error, updates=upd, allow_input_downcast=True)

X = oh_x_rnn
Y = oh_y_rnn
print X.shape, Y.shape

err = 0.0; l_rate = 0.01
for i in xrange(len(X)):
    err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
    if i % 1000 == 999:
        print ("ErrorX:{:.3f}".format(err/1000))
        err=0.0


(75110, 8, 86) (75110, 8, 86)
ErrorX:28.986
ErrorX:25.977
ErrorX:25.761
ErrorX:25.518
ErrorX:25.341
ErrorX:25.490
ErrorX:25.177
ErrorX:25.101
ErrorX:25.151
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-7-31073226ba92> in <module>()
     54 err = 0.0; l_rate = 0.01
     55 for i in xrange(len(X)):
---> 56     err += fn(np.zeros(n_hidden), X[i], Y[i], l_rate)
     57     if i % 1000 == 999:
     58         print ("ErrorX:{:.3f}".format(err/1000))

/Users/WayNoxchi/Miniconda3/Theano/theano/compile/function_module.pyc in __call__(self, *args, **kwargs)
    882         try:
    883             outputs =\
--> 884                 self.fn() if output_subset is None else\
    885                 self.fn(output_subset=output_subset)
    886         except Exception:

/Users/WayNoxchi/Miniconda3/Theano/theano/scan_module/scan_op.pyc in rval(p, i, o, n, allow_gc)
    959         allow_gc = config.allow_gc and not self.allow_gc
    960 
--> 961         def rval(p=p, i=node_input_storage, o=node_output_storage, n=node,
    962                  allow_gc=allow_gc):
    963             r = p(n, [x[0] for x in i], o)

KeyboardInterrupt: 

Looks like this model fit isn't the one that changes x....


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: