In [1]:
!uname -a


Darwin LMUCM608159 15.6.0 Darwin Kernel Version 15.6.0: Mon Jan  9 23:07:29 PST 2017; root:xnu-3248.60.11.2.1~1/RELEASE_X86_64 x86_64 i386 MacBookPro11,5 Darwin

In [2]:
#from theano.sandbox import cuda
#cuda.use('gpu2')

In [2]:
%matplotlib inline
import utils
from utils import *
from __future__ import division, print_function

from IPython.core.debugger import Tracer
#import ipdb; ipdb.set_trace()  # not working


Using TensorFlow backend.

In [3]:
from keras.layers import TimeDistributed, Activation
from numpy.random import choice

Setup

We haven't really looked into the detail of how this works yet - so this is provided for self-study for those who are interested. We'll look at it closely next week.


In [4]:
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read().lower()
print('corpus length:', len(text))


corpus length: 600893

In [5]:
path


Out[5]:
'/Users/q187392/.keras/datasets/nietzsche.txt'

In [6]:
!tail {path} -n25


are thinkers who believe in the saints.


144

It stands to reason that this sketch of the saint, made upon the model
of the whole species, can be confronted with many opposing sketches that
would create a more agreeable impression. There are certain exceptions
among the species who distinguish themselves either by especial
gentleness or especial humanity, and perhaps by the strength of their
own personality. Others are in the highest degree fascinating because
certain of their delusions shed a particular glow over their whole
being, as is the case with the founder of christianity who took himself
for the only begotten son of God and hence felt himself sinless; so that
through his imagination--that should not be too harshly judged since the
whole of antiquity swarmed with sons of god--he attained the same goal,
the sense of complete sinlessness, complete irresponsibility, that can
now be attained by every individual through science.--In the same manner
I have viewed the saints of India who occupy an intermediate station
between the christian saints and the Greek philosophers and hence are
not to be regarded as a pure type. Knowledge and science--as far as they
existed--and superiority to the rest of mankind by logical discipline
and training of the intellectual powers were insisted upon by the
Buddhists as essential to sanctity, just as they were denounced by the
christian world as the indications of sinfulness.

In [8]:
#path = 'data/wiki/'
#text = open(path+'small.txt').read().lower()
#print('corpus length:', len(text))

#text = text[0:1000000]

In [7]:
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)


total chars: 58

In [8]:
chars.insert(0, "\0")

In [9]:
''.join(chars[1:-6])


Out[9]:
'\n !"\'(),-.0123456789:;=?[]_abcdefghijklmnopqrstuvwx'

In [10]:
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

In [11]:
idx = [char_indices[c] for c in text]

In [12]:
idx[:20]


Out[12]:
[43, 45, 32, 33, 28, 30, 32, 1, 1, 1, 46, 48, 43, 43, 42, 46, 36, 41, 34, 2]

In [13]:
''.join(indices_char[i] for i in idx[:70])


Out[13]:
'preface\n\n\nsupposing that truth is a woman--what then? is there not gro'

In [14]:
cs = 3
[idx[i] for i in range(0, len(idx)-1-cs, cs)]


Out[14]:
[43,
 33,
 32,
 1,
 43,
 46,
 34,
 35,
 2,
 48,
 2,
 2,
 50,
 28,
 9,
 28,
 47,
 41,
 36,
 47,
 45,
 41,
 2,
 42,
 31,
 42,
 46,
 43,
 47,
 34,
 35,
 2,
 39,
 35,
 42,
 43,
 45,
 2,
 2,
 2,
 45,
 46,
 35,
 2,
 49,
 29,
 41,
 42,
 28,
 46,
 8,
 28,
 2,
 36,
 31,
 42,
 41,
 45,
 28,
 2,
 40,
 9,
 35,
 2,
 32,
 32,
 36,
 32,
 32,
 42,
 41,
 46,
 41,
 30,
 40,
 2,
 43,
 47,
 36,
 2,
 47,
 50,
 30,
 47,
 52,
 28,
 2,
 48,
 39,
 43,
 31,
 35,
 45,
 31,
 32,
 32,
 47,
 47,
 47,
 2,
 49,
 29,
 41,
 41,
 36,
 32,
 28,
 2,
 46,
 40,
 2,
 47,
 31,
 33,
 1,
 41,
 41,
 28,
 42,
 41,
 30,
 47,
 41,
 2,
 32,
 28,
 41,
 32,
 28,
 42,
 31,
 32,
 32,
 2,
 2,
 2,
 41,
 28,
 1,
 2,
 32,
 41,
 32,
 45,
 38,
 31,
 33,
 42,
 28,
 47,
 31,
 50,
 35,
 28,
 28,
 2,
 46,
 48,
 34,
 2,
 32,
 9,
 8,
 41,
 32,
 2,
 2,
 28,
 46,
 47,
 39,
 2,
 45,
 35,
 32,
 45,
 46,
 33,
 45,
 50,
 2,
 36,
 28,
 2,
 28,
 36,
 35,
 2,
 39,
 41,
 47,
 47,
 39,
 31,
 40,
 39,
 46,
 41,
 35,
 34,
 48,
 9,
 28,
 40,
 32,
 47,
 47,
 47,
 46,
 47,
 47,
 39,
 47,
 28,
 10,
 48,
 47,
 46,
 28,
 46,
 36,
 46,
 8,
 35,
 32,
 45,
 34,
 31,
 45,
 41,
 2,
 45,
 42,
 41,
 47,
 47,
 39,
 31,
 40,
 36,
 41,
 36,
 43,
 39,
 42,
 52,
 50,
 47,
 32,
 46,
 32,
 8,
 35,
 32,
 45,
 42,
 39,
 36,
 1,
 31,
 32,
 31,
 2,
 45,
 36,
 35,
 2,
 46,
 32,
 2,
 52,
 28,
 2,
 32,
 42,
 52,
 2,
 29,
 2,
 32,
 39,
 40,
 41,
 47,
 42,
 46,
 2,
 31,
 45,
 28,
 52,
 35,
 47,
 32,
 46,
 47,
 28,
 2,
 32,
 36,
 50,
 39,
 32,
 41,
 1,
 31,
 34,
 41,
 41,
 45,
 42,
 2,
 28,
 35,
 2,
 47,
 39,
 2,
 33,
 30,
 2,
 45,
 35,
 29,
 36,
 42,
 46,
 35,
 40,
 46,
 34,
 41,
 28,
 42,
 47,
 43,
 39,
 42,
 36,
 39,
 31,
 36,
 46,
 46,
 35,
 31,
 40,
 36,
 46,
 28,
 1,
 47,
 45,
 2,
 28,
 31,
 43,
 35,
 46,
 42,
 2,
 43,
 28,
 46,
 32,
 47,
 36,
 2,
 2,
 40,
 42,
 28,
 47,
 32,
 46,
 35,
 46,
 35,
 46,
 39,
 48,
 45,
 36,
 42,
 2,
 36,
 8,
 41,
 35,
 33,
 40,
 33,
 48,
 32,
 9,
 41,
 32,
 9,
 43,
 46,
 47,
 41,
 35,
 2,
 47,
 32,
 30,
 46,
 2,
 36,
 2,
 46,
 36,
 7,
 43,
 35,
 46,
 42,
 1,
 28,
 48,
 41,
 42,
 46,
 28,
 32,
 43,
 42,
 42,
 47,
 2,
 45,
 42,
 34,
 40,
 45,
 42,
 28,
 28,
 28,
 42,
 2,
 41,
 28,
 53,
 36,
 2,
 2,
 45,
 45,
 47,
 30,
 31,
 49,
 52,
 32,
 42,
 39,
 49,
 52,
 48,
 41,
 28,
 9,
 42,
 48,
 41,
 28,
 46,
 47,
 2,
 36,
 46,
 35,
 42,
 47,
 2,
 34,
 47,
 47,
 2,
 2,
 2,
 1,
 2,
 43,
 8,
 28,
 42,
 52,
 2,
 42,
 46,
 33,
 2,
 42,
 28,
 46,
 33,
 32,
 46,
 33,
 45,
 45,
 8,
 46,
 28,
 28,
 45,
 42,
 2,
 2,
 36,
 2,
 45,
 32,
 47,
 32,
 2,
 2,
 32,
 32,
 36,
 2,
 2,
 36,
 2,
 42,
 29,
 2,
 45,
 39,
 42,
 8,
 42,
 8,
 30,
 32,
 46,
 2,
 31,
 28,
 32,
 32,
 28,
 2,
 32,
 46,
 41,
 47,
 41,
 41,
 41,
 28,
 48,
 2,
 36,
 30,
 35,
 35,
 47,
 2,
 2,
 32,
 42,
 47,
 28,
 2,
 2,
 46,
 46,
 32,
 47,
 45,
 47,
 28,
 1,
 32,
 41,
 42,
 2,
 2,
 36,
 28,
 2,
 52,
 8,
 35,
 34,
 41,
 46,
 39,
 42,
 28,
 35,
 32,
 48,
 10,
 47,
 32,
 46,
 35,
 2,
 2,
 31,
 2,
 2,
 46,
 36,
 2,
 32,
 32,
 32,
 48,
 41,
 35,
 35,
 45,
 42,
 35,
 28,
 47,
 50,
 35,
 49,
 39,
 47,
 34,
 39,
 40,
 2,
 39,
 45,
 47,
 35,
 34,
 35,
 32,
 36,
 47,
 42,
 28,
 32,
 28,
 48,
 47,
 1,
 45,
 2,
 2,
 42,
 42,
 2,
 31,
 50,
 36,
 43,
 36,
 2,
 45,
 28,
 45,
 21,
 42,
 28,
 30,
 35,
 42,
 43,
 2,
 46,
 32,
 2,
 30,
 36,
 47,
 32,
 33,
 35,
 2,
 41,
 9,
 45,
 41,
 28,
 32,
 47,
 2,
 31,
 47,
 31,
 47,
 41,
 36,
 28,
 28,
 28,
 2,
 28,
 41,
 40,
 41,
 48,
 43,
 2,
 47,
 46,
 42,
 29,
 48,
 45,
 32,
 39,
 42,
 47,
 28,
 35,
 34,
 36,
 40,
 47,
 32,
 28,
 39,
 29,
 30,
 33,
 46,
 2,
 28,
 47,
 2,
 45,
 8,
 35,
 40,
 47,
 36,
 46,
 32,
 28,
 2,
 32,
 42,
 2,
 41,
 45,
 46,
 33,
 45,
 45,
 35,
 35,
 47,
 35,
 2,
 32,
 28,
 42,
 28,
 46,
 32,
 42,
 9,
 40,
 52,
 43,
 47,
 46,
 41,
 41,
 42,
 42,
 43,
 32,
 43,
 36,
 28,
 2,
 32,
 42,
 2,
 2,
 46,
 33,
 29,
 2,
 50,
 35,
 2,
 2,
 46,
 32,
 2,
 45,
 48,
 32,
 2,
 32,
 32,
 42,
 8,
 36,
 42,
 47,
 46,
 36,
 47,
 45,
 1,
 41,
 34,
 41,
 45,
 2,
 32,
 35,
 45,
 39,
 28,
 2,
 2,
 28,
 2,
 37,
 2,
 35,
 39,
 36,
 9,
 39,
 43,
 50,
 2,
 42,
 2,
 47,
 36,
 50,
 32,
 39,
 46,
 36,
 32,
 8,
 45,
 47,
 2,
 36,
 2,
 2,
 39,
 35,
 46,
 32,
 47,
 50,
 30,
 47,
 2,
 45,
 34,
 2,
 28,
 46,
 47,
 46,
 45,
 45,
 28,
 33,
 47,
 32,
 2,
 2,
 42,
 47,
 2,
 1,
 32,
 32,
 2,
 49,
 46,
 41,
 33,
 45,
 35,
 28,
 2,
 32,
 32,
 28,
 42,
 47,
 2,
 45,
 32,
 36,
 9,
 35,
 33,
 31,
 32,
 28,
 30,
 31,
 36,
 9,
 33,
 36,
 8,
 42,
 43,
 38,
 33,
 43,
 36,
 28,
 2,
 32,
 42,
 2,
 2,
 28,
 1,
 42,
 2,
 2,
 32,
 2,
 31,
 31,
 41,
 40,
 35,
 28,
 8,
 46,
 2,
 52,
 30,
 41,
 4,
 50,
 36,
 46,
 35,
 1,
 39,
 52,
 47,
 30,
 47,
 47,
 36,
 46,
 43,
 31,
 47,
 33,
 41,
 44,
 47,
 2,
 28,
 24,
 28,
 47,
 2,
 30,
 31,
 42,
 28,
 46,
 32,
 39,
 30,
 45,
 47,
 2,
 40,
 50,
 2,
 30,
 47,
 2,
 47,
 2,
 39,
 2,
 45,
 43,
 45,
 33,
 42,
 35,
 2,
 31,
 32,
 ...]

Preprocess and create model


In [15]:
cs = 40
c1 = [idx[i:i+cs] for i in range(0, len(idx)-1, cs)]
c2 = [idx[i:i+cs] for i in range(1, len(idx), cs)]

In [19]:
"".join([indices_char[i] for i in c1[0]])
"".join([indices_char[i] for i in c2[0]])


Out[19]:
'preface\n\n\nsupposing that truth is a woma'
Out[19]:
'reface\n\n\nsupposing that truth is a woman'

In [25]:
"".join([indices_char[i] for i in c1[0:-2][0]])


Out[25]:
'preface\n\n\nsupposing that truth is a woma'

In [19]:
x = np.stack(c1[:-2])
y = np.stack(c2[:-2])
y = np.expand_dims(y, -1)
x.shape, y.shape


Out[19]:
((15021, 40), (15021, 40, 1))

In [20]:
n_fac = 42

In [21]:
pmodel=Sequential([
        #Embedding(vocab_size, n_fac, input_length=maxlen),
        Embedding(vocab_size, n_fac, input_length=1, batch_input_shape=(1,1)),
        BatchNormalization(),
        LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
        LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
        TimeDistributed(Dense(512, activation='relu')),
        Dropout(0.1),
        TimeDistributed(Dense(vocab_size, activation='softmax'))
    ])    
pmodel.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
embedding_1 (Embedding)          (1, 1, 42)            2436        embedding_input_1[0][0]          
____________________________________________________________________________________________________
batchnormalization_1 (BatchNormal(1, 1, 42)            84          embedding_1[0][0]                
____________________________________________________________________________________________________
lstm_1 (LSTM)                    (1, 1, 512)           1136640     batchnormalization_1[0][0]       
____________________________________________________________________________________________________
lstm_2 (LSTM)                    (1, 1, 512)           2099200     lstm_1[0][0]                     
____________________________________________________________________________________________________
timedistributed_1 (TimeDistribute(1, 1, 512)           262656      lstm_2[0][0]                     
____________________________________________________________________________________________________
dropout_1 (Dropout)              (1, 1, 512)           0           timedistributed_1[0][0]          
____________________________________________________________________________________________________
timedistributed_2 (TimeDistribute(1, 1, 58)            29754       dropout_1[0][0]                  
====================================================================================================
Total params: 3530770
____________________________________________________________________________________________________

In [22]:
batch_size = 64
model=Sequential([
        Embedding(vocab_size, n_fac, input_length=cs, batch_input_shape=(batch_size, 40)),
        BatchNormalization(),
        LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
        LSTM(512, return_sequences=True, stateful=True, dropout_U=0.2, dropout_W=0.2, consume_less='gpu'),
        TimeDistributed(Dense(512, activation='relu')),
        Dropout(0.1),
        TimeDistributed(Dense(vocab_size, activation='softmax'))
    ])    
model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
embedding_2 (Embedding)          (64, 40, 42)          2436        embedding_input_2[0][0]          
____________________________________________________________________________________________________
batchnormalization_2 (BatchNormal(64, 40, 42)          84          embedding_2[0][0]                
____________________________________________________________________________________________________
lstm_3 (LSTM)                    (64, 40, 512)         1136640     batchnormalization_2[0][0]       
____________________________________________________________________________________________________
lstm_4 (LSTM)                    (64, 40, 512)         2099200     lstm_3[0][0]                     
____________________________________________________________________________________________________
timedistributed_3 (TimeDistribute(64, 40, 512)         262656      lstm_4[0][0]                     
____________________________________________________________________________________________________
dropout_2 (Dropout)              (64, 40, 512)         0           timedistributed_3[0][0]          
____________________________________________________________________________________________________
timedistributed_4 (TimeDistribute(64, 40, 58)          29754       dropout_2[0][0]                  
====================================================================================================
Total params: 3530770
____________________________________________________________________________________________________

In [23]:
for l in model.layers:
    print(l.name)


embedding_2
batchnormalization_2
lstm_3
lstm_4
timedistributed_3
dropout_2
timedistributed_4

In [24]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

Train


In [25]:
# In a stateful network, you should only pass inputs with a number of samples that can be divided by the batch size.
mx = len(x)//64*64
mx


Out[25]:
14976

In [27]:
import time
import tensorflow as tf

def run_epochs(n):
    keras.backend.get_session().run(tf.global_variables_initializer())  ## bug in keras/TF, new version
    #keras.backend.get_session().run(tf.initialize_all_variables()) ## bug, old version
    for i in range(n):
        start = time.time()
        print("-- Epoch: {}".format(i))
        model.reset_states()
        h = model.fit(x[:mx], y[:mx], batch_size=batch_size, nb_epoch=1, shuffle=False, verbose=0)
        print("-- duration: {}, loss: {}".format(time.time()-start, h.history['loss']))

In [28]:
run_epochs(1)


-- Epoch: 0
-- duration: 59.65085816383362, loss: [2.3404522808189068]

In [29]:
model.save_weights('data/nietzsche_ep1_TF.h5')

In [30]:
model.load_weights('data/nietzsche_ep1_TF.h5')

In [31]:
def print_example2(ln=160):
    for l1, l2 in zip(model.layers, pmodel.layers):
        if l1.name != "batchnormalization_1":
            #Tracer()()  #this one triggers debugger
            #print("layer: {}, len: {}".format(l1.name, len(l1.get_weights())))
            l2.set_weights(l1.get_weights())
    pmodel.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())
    offset = 10
    seed_string=text[offset:ln+offset//4]
    pmodel.reset_states()
    
    # build context??
    for s in seed_string:
        x = np.array([char_indices[s]])[np.newaxis,:]
        preds = pmodel.predict(x, verbose=0)[0][0]
        #print("pred.shape:{}, pred:{}:{}".format(preds.shape, np.argmax(preds), choice(chars, p=preds)))
    s = choice(chars, p=preds)
    
    res=seed_string+s+'...\n\n'
    for i in range(ln):
        x = np.array([char_indices[s]])[np.newaxis,:]
        preds = pmodel.predict(x, verbose=0)[0][0]
        pres = choice(chars, p=preds)
        res = res+pres
    print(res)

In [32]:
print_example2()


supposing that truth is a woman--what then? is there not ground
for suspecting that all philosophers, in so far as they have been
dogmatists, have failed...

  , 
eaue    i i  -se.i  ,l , ii   se  s  i   eyo et ye y i  ei.
   
es   e:., oa pe.. e e,  , ee   aia -ie eeeo  ,.  )ee  
   e     eo.e,, ?e ee "    -:eeoe   

In [33]:
run_epochs(10)
print_example2()


-- Epoch: 0
-- duration: 50.12487030029297, loss: [2.3661422102879257]
-- Epoch: 1
-- duration: 49.74427628517151, loss: [1.7942562796111801]
-- Epoch: 2
-- duration: 49.80745816230774, loss: [1.6238063502515483]
-- Epoch: 3
-- duration: 50.29486870765686, loss: [1.5381001779156873]
-- Epoch: 4
-- duration: 50.04439949989319, loss: [1.4831517364224818]
-- Epoch: 5
-- duration: 50.02503514289856, loss: [1.4422495584202628]
-- Epoch: 6
-- duration: 50.51545548439026, loss: [1.4105210951250842]
-- Epoch: 7
-- duration: 49.256539821624756, loss: [1.3833295993315868]
-- Epoch: 8
-- duration: 49.66960120201111, loss: [1.3600059238254514]
-- Epoch: 9
-- duration: 50.87227201461792, loss: [1.339868517512949]
supposing that truth is a woman--what then? is there not ground
for suspecting that all philosophers, in so far as they have been
dogmatists, have failed...

 e   
  e 
i e   ee , asu  ,e 
 i,
  oa e e e     se ,      i   e i i   e sa    i s. ,  si,  ase. .ee
e    s e   e ae  ,e 
 a  e
  e  e   er  e     u a  ,     e

In [34]:
# not working for statefull, input dim (1, 40) not matchin (64, 40)
def print_example():
    seed_string="ethics is a basic foundation of all that"
    for i in range(320):
        x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]
        preds = model.predict(x, verbose=0)[0][-1]
        preds = preds/np.sum(preds)
        next_char = choice(chars, p=preds)
        seed_string = seed_string + next_char
    print(seed_string)

In [36]:
#print_example()

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)

In [ ]:
output.show()

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)

In [ ]:
output.show()

In [ ]:
print_example()

In [ ]:
model.optimizer.lr=0.001

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=2)

In [ ]:
output.show()

In [ ]:
print_example()

In [ ]:
model.optimizer.lr=0.0001

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=3)

In [ ]:
output.show()

In [ ]:
print_example()

In [ ]:
model.save_weights('data/char_rnn.h5')

In [ ]:
model.optimizer.lr=0.00001

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)

In [ ]:
model.save_weights('data/char_rnn.h5')

In [ ]:
output.show()

In [ ]:
print_example()

In [ ]:
%%capture output
model.fit(sentences, np.expand_dims(next_chars,-1), batch_size=64, nb_epoch=1)

In [ ]:
output.show()

In [ ]:
print_example()

In [ ]:
print_example()

In [ ]:
model.save_weights('data/char_rnn.h5')

In [ ]: