In [1]:
!uname -a
In [2]:
#from theano.sandbox import cuda
#cuda.use('gpu2')
In [2]:
%matplotlib inline
import utils
from utils import *
from __future__ import division, print_function
from IPython.core.debugger import Tracer
#import ipdb; ipdb.set_trace() # not working
In [3]:
from keras.layers import TimeDistributed, Activation
from numpy.random import choice
We haven't really looked into the detail of how this works yet - so this is provided for self-study for those who are interested. We'll look at it closely next week.
In [4]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read().lower()
print('corpus length:', len(text))
In [5]:
path
Out[5]:
In [6]:
!tail {path} -n25
In [8]:
#path = 'data/wiki/'
#text = open(path+'small.txt').read().lower()
#print('corpus length:', len(text))
#text = text[0:1000000]
In [7]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)
In [8]:
chars.insert(0, "\0")
In [9]:
''.join(chars[1:-6])
Out[9]:
In [10]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
In [11]:
idx = [char_indices[c] for c in text]
In [12]:
idx[:20]
Out[12]:
In [13]:
''.join(indices_char[i] for i in idx[:70])
Out[13]:
In [14]:
cs = 3
[idx[i] for i in range(0, len(idx)-1-cs, cs)]
Out[14]:
In [15]:
cs = 40
c1 = [idx[i:i+cs] for i in range(0, len(idx)-1, cs)]
c2 = [idx[i:i+cs] for i in range(1, len(idx), cs)]
In [19]:
"".join([indices_char[i] for i in c1[0]])
"".join([indices_char[i] for i in c2[0]])
Out[19]:
Out[19]:
In [25]:
"".join([indices_char[i] for i in c1[0:-2][0]])
Out[25]:
In [19]:
x = np.stack(c1[:-2])
y = np.stack(c2[:-2])
y = np.expand_dims(y, -1)
x.shape, y.shape
Out[19]:
In [20]:
n_fac = 42
In [21]:
pmodel=Sequential([
#Embedding(vocab_size, n_fac, input_length=maxlen),
Embedding(vocab_size, n_fac, input_length=1, batch_input_shape=(1,1)),
BatchNormalization(),
LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
TimeDistributed(Dense(512, activation='relu')),
Dropout(0.1),
TimeDistributed(Dense(vocab_size, activation='softmax'))
])
pmodel.summary()
In [22]:
batch_size = 64
model=Sequential([
Embedding(vocab_size, n_fac, input_length=cs, batch_input_shape=(batch_size, 40)),
BatchNormalization(),
LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
TimeDistributed(Dense(512, activation='relu')),
Dropout(0.1),
TimeDistributed(Dense(vocab_size, activation='softmax'))
])
model.summary()
In [23]:
for l in model.layers:
print(l.name)
In [24]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())
In [25]:
# In a stateful network, you should only pass inputs with a number of samples that can be divided by the batch size.
mx = len(x)//64*64
mx
Out[25]:
In [27]:
import time
import tensorflow as tf
def run_epochs(n):
keras.backend.get_session().run(tf.global_variables_initializer()) ## bug in keras/TF, new version
#keras.backend.get_session().run(tf.initialize_all_variables()) ## bug, old version
for i in range(n):
start = time.time()
print("-- Epoch: {}".format(i))
model.reset_states()
h = model.fit(x[:mx], y[:mx], batch_size=batch_size, nb_epoch=1, shuffle=False, verbose=0)
print("-- duration: {}, loss: {}".format(time.time()-start, h.history['loss']))
In [28]:
run_epochs(1)
In [29]:
model.save_weights('data/nietzsche_ep1_TF.h5')
In [30]:
model.load_weights('data/nietzsche_ep1_TF.h5')
In [31]:
def print_example2(ln=160):
for l1, l2 in zip(model.layers, pmodel.layers):
if l1.name != "batchnormalization_1":
#Tracer()() #this one triggers debugger
#print("layer: {}, len: {}".format(l1.name, len(l1.get_weights())))
l2.set_weights(l1.get_weights())
pmodel.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())
offset = 10
seed_string=text[offset:ln+offset//4]
pmodel.reset_states()
# build context??
for s in seed_string:
x = np.array([char_indices[s]])[np.newaxis,:]
preds = pmodel.predict(x, verbose=0)[0][0]
#print("pred.shape:{}, pred:{}:{}".format(preds.shape, np.argmax(preds), choice(chars, p=preds)))
s = choice(chars, p=preds)
res=seed_string+s+'...\n\n'
for i in range(ln):
x = np.array([char_indices[s]])[np.newaxis,:]
preds = pmodel.predict(x, verbose=0)[0][0]
pres = choice(chars, p=preds)
res = res+pres
print(res)
In [32]:
print_example2()
In [33]:
run_epochs(10)
print_example2()
In [34]:
# not working for statefull, input dim (1, 40) not matchin (64, 40)
def print_example():
seed_string="ethics is a basic foundation of all that"
for i in range(320):
x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]
preds = model.predict(x, verbose=0)[0][-1]
preds = preds/np.sum(preds)
next_char = choice(chars, p=preds)
seed_string = seed_string + next_char
print(seed_string)
In [36]:
#print_example()
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)
In [ ]:
output.show()
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)
In [ ]:
output.show()
In [ ]:
print_example()
In [ ]:
model.optimizer.lr=0.001
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=2)
In [ ]:
output.show()
In [ ]:
print_example()
In [ ]:
model.optimizer.lr=0.0001
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=3)
In [ ]:
output.show()
In [ ]:
print_example()
In [ ]:
model.save_weights('data/char_rnn.h5')
In [ ]:
model.optimizer.lr=0.00001
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)
In [ ]:
model.save_weights('data/char_rnn.h5')
In [ ]:
output.show()
In [ ]:
print_example()
In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)
In [ ]:
output.show()
In [ ]:
print_example()
In [ ]:
print_example()
In [ ]:
model.save_weights('data/char_rnn.h5')
In [ ]: