In [1]:
from __future__ import print_function

# to be able to see plots
%matplotlib inline  
import matplotlib.pyplot as plt

import numpy as np

import sys

from tools import collage

# just to use a fraction of GPU memory 
# This is not needed on dedicated machines.
# Allows you to share the GPU.
# This is specific to tensorflow.
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_usage

Using TensorFlow backend.

Create data generator

The generator creates mini-batches which of continuous sequences - that is: sequence 1 in batch t starts where sequence 1 ended in batch t-1.

reset() method restarts the sequences from new random positions.

The behavior is used in statefull behavior of the network during training.

In [6]:
class myGenerator(object):
    def __init__(self, data, length=48, batchSize=32):
        self.length = length
        self.batchSize = batchSize = data
        self.positions = np.arange(0, data.shape[0], data.shape[0] / batchSize)
    def __iter__(self):
        return self
    def __next__(self):
    def next(self):
        d = []
        l = []
        for i in range(self.batchSize):
            p = self.positions[i]
            self.positions[i] = (self.positions[i] + length) % ([0] - 2*length)
        return np.stack(d), np.stack(l).reshape(self.batchSize, self.length, 1)
    def reset(self):
        self.positions = np.random.randint([0]-10*self.length, size=self.positions.size)
batchSize = 32    
length = 48
generator = myGenerator(dataT, length, batchSize)

Define net

In [7]:
from keras.layers import Embedding, CuDNNGRU, SimpleRNN, CuDNNLSTM
from keras.layers import Activation, BatchNormalization, Dense, Average, Maximum, Concatenate
from keras.models import Model
from keras import regularizers, initializers

def get_GRU_network(input_data, layer_cout, layer_dim, stateful=False):
    net = Embedding(256, output_dim=32)(input_data)
    net = BatchNormalization()(net)
    previous = []
    for i in range(layer_cout):
        net = CuDNNGRU(layer_dim, return_sequences=True, stateful=stateful, 
        if len(previous) > 1:
            net1 = Average()(previous)
            net2 = Maximum()(previous)
            net = Concatenate()([net1, net2, net])
        net = Dense(layer_dim)(net)
        net = BatchNormalization()(net)
        net = Activation('relu')(net)
    net = Dense(256)(net)
    net = Activation('softmax')(net)

    return net

def get_GRU_simple_network(input_data, layer_cout, layer_dim, stateful=False):
    net = Embedding(256, output_dim=32)(input_data)
    net = BatchNormalization()(net)
    for i in range(layer_cout):
        net = CuDNNGRU(layer_dim, return_sequences=True, stateful=stateful)(net)
        net = BatchNormalization()(net)
    net = Dense(256)(net)
    net = Activation('softmax')(net)

    return net

In [8]:
from keras import optimizers
from keras.models import Model
from keras import losses
from keras import metrics
from keras.layers import Input

layerCount = 2
layerSize = 720

input_data = Input(batch_shape=(batchSize, length), name='data')
net = get_GRU_simple_network(input_data, layerCount, layerSize, stateful=True)
model = Model(inputs=[input_data], outputs=[net])

# model for text generation
input_data = Input(batch_shape=(1,1), name='data')
predictNet = get_GRU_simple_network(input_data, layerCount, layerSize, stateful=True)
predModel = Model(inputs=[input_data], outputs=[predictNet])


In [10]:
    optimizer=optimizers.Adam(lr=0.0002, clipnorm=5., clipvalue=1), # no good reason for clipnorm and clipvalue just experimenting

In [12]:
import keras
# This callback resets sequence with probability 10% after each batch
class My_Callback(keras.callbacks.Callback):
    def on_batch_end(self, batch, logs={}):
        if np.random.rand() > 0.9:
model.fit_generator(generator=generator, steps_per_epoch=1000, epochs=30, verbose=1, callbacks=[My_Callback()])

Generate text

In [13]:
predModel.load_weights('model.mod', by_name=False)

In [19]:
last = np.zeros(1, dtype=int).reshape(1,1)
last[0,0] = dataT[10]
startString = u'Start the string with this'
if startString:
    startString = [ord(x) for x in startString]
    last[0,0] = startString[0]
    print(chr(last[0,0]), end='')
    for i in startString[1:]:
        pred = predModel.predict(last)
        last[0,0] = i
        print(chr(last[0,0]), end='')

for i in range(10000):
    pred = predModel.predict(last)

    p = pred[0,0]**1.2
    p /= p.sum()    

    last[0,0] = np.random.choice(p.size, p=p)
    print(chr(last[0,0]), end='')

In [ ]: