In [1]:
from __future__ import print_function
# to be able to see plots
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append("../tools")
from tools import collage
# just to use a fraction of GPU memory
# This is not needed on dedicated machines.
# Allows you to share the GPU.
# This is specific to tensorflow.
gpu_memory_usage=0.8
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_usage
set_session(tf.Session(config=config))
Good corpus to start with is WIKI. A easy starting version can be downloaded from http://www.cs.upc.edu/~nlp/wikicorpus/.
In [2]:
with open('./en.wiki.txt', 'r') as f:
data = f.read()
data = data[:50000000]
pos = np.random.randint(len(data)-500)
print(data[pos:pos+1000])
dataT = np.asarray(bytearray(data), dtype=np.uint8)
The generator creates mini-batches which of continuous sequences - that is: sequence 1 in batch t starts where sequence 1 ended in batch t-1.
reset() method restarts the sequences from new random positions.
The behavior is used in statefull behavior of the network during training.
In [6]:
class myGenerator(object):
def __init__(self, data, length=48, batchSize=32):
self.length = length
self.batchSize = batchSize
self.data = data
self.positions = np.arange(0, data.shape[0], data.shape[0] / batchSize)
def __iter__(self):
return self
def __next__(self):
return self.next()
def next(self):
d = []
l = []
for i in range(self.batchSize):
p = self.positions[i]
d.append(self.data[p:p+length])
l.append(self.data[p+1:p+length+1])
self.positions[i] = (self.positions[i] + length) % (self.data.shape[0] - 2*length)
return np.stack(d), np.stack(l).reshape(self.batchSize, self.length, 1)
def reset(self):
self.positions = np.random.randint(self.data.shape[0]-10*self.length, size=self.positions.size)
batchSize = 32
length = 48
generator = myGenerator(dataT, length, batchSize)
In [7]:
from keras.layers import Embedding, CuDNNGRU, SimpleRNN, CuDNNLSTM
from keras.layers import Activation, BatchNormalization, Dense, Average, Maximum, Concatenate
from keras.models import Model
from keras import regularizers, initializers
def get_GRU_network(input_data, layer_cout, layer_dim, stateful=False):
net = Embedding(256, output_dim=32)(input_data)
net = BatchNormalization()(net)
previous = []
for i in range(layer_cout):
net = CuDNNGRU(layer_dim, return_sequences=True, stateful=stateful,
kernel_regularizer=regularizers.l2(0.000001),
recurrent_regularizer=regularizers.l2(0.000001))(net)
previous.append(net)
if len(previous) > 1:
net1 = Average()(previous)
net2 = Maximum()(previous)
net = Concatenate()([net1, net2, net])
net = Dense(layer_dim)(net)
net = BatchNormalization()(net)
net = Activation('relu')(net)
net = Dense(256)(net)
net = Activation('softmax')(net)
return net
def get_GRU_simple_network(input_data, layer_cout, layer_dim, stateful=False):
net = Embedding(256, output_dim=32)(input_data)
net = BatchNormalization()(net)
for i in range(layer_cout):
net = CuDNNGRU(layer_dim, return_sequences=True, stateful=stateful)(net)
net = BatchNormalization()(net)
net = Dense(256)(net)
net = Activation('softmax')(net)
return net
In [8]:
from keras import optimizers
from keras.models import Model
from keras import losses
from keras import metrics
from keras.layers import Input
layerCount = 2
layerSize = 720
input_data = Input(batch_shape=(batchSize, length), name='data')
net = get_GRU_simple_network(input_data, layerCount, layerSize, stateful=True)
model = Model(inputs=[input_data], outputs=[net])
# model for text generation
input_data = Input(batch_shape=(1,1), name='data')
predictNet = get_GRU_simple_network(input_data, layerCount, layerSize, stateful=True)
predModel = Model(inputs=[input_data], outputs=[predictNet])
print('Model')
model.summary()
In [10]:
model.compile(
loss=losses.sparse_categorical_crossentropy,
optimizer=optimizers.Adam(lr=0.0002, clipnorm=5., clipvalue=1), # no good reason for clipnorm and clipvalue just experimenting
metrics=[metrics.sparse_categorical_accuracy])
In [12]:
import keras
# This callback resets sequence with probability 10% after each batch
class My_Callback(keras.callbacks.Callback):
def on_batch_end(self, batch, logs={}):
if np.random.rand() > 0.9:
self.model.reset_states()
generator.reset()
return
model.fit_generator(generator=generator, steps_per_epoch=1000, epochs=30, verbose=1, callbacks=[My_Callback()])
Out[12]:
In [13]:
model.save_weights('model.mod')
predModel.load_weights('model.mod', by_name=False)
predModel.reset_states()
In [19]:
last = np.zeros(1, dtype=int).reshape(1,1)
last[0,0] = dataT[10]
predModel.reset_states()
startString = u'Start the string with this'
if startString:
startString = [ord(x) for x in startString]
last[0,0] = startString[0]
print(chr(last[0,0]), end='')
for i in startString[1:]:
pred = predModel.predict(last)
last[0,0] = i
print(chr(last[0,0]), end='')
for i in range(10000):
pred = predModel.predict(last)
#print(pred)
p = pred[0,0]**1.2
p /= p.sum()
last[0,0] = np.random.choice(p.size, p=p)
print(chr(last[0,0]), end='')
print()
In [ ]: