LSTM



In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import sys
import time
import random

In [2]:
import tensorflow as tf
import tensorflow.contrib.keras as keras

from tensorflow.contrib.keras import backend as K
from tensorflow.contrib.keras.python.keras.models import Sequential
from tensorflow.contrib.keras.python.keras.layers import Dense, LSTM, Activation

In [3]:
print("TensorFlow version =", tf.__version__)
#print("TF contrib Keras version =",keras.__version__) ???
print("Python version =",sys.version)
print("Keras backend =", keras.backend.backend())


TensorFlow version = 1.3.0
Python version = 3.5.2 (default, Nov 17 2016, 17:05:23) 
[GCC 5.4.0 20160609]
Keras backend = tensorflow

In [4]:
def vector_to_char(vec, ix_to_char):
    """Returns most probable character represented by the 'one-hot' vector of probabilities."""
    return ix_to_char[np.argmax(vec)]

In [5]:
filename = 'data/first_names.txt'
data = open(filename, 'r').read() # should be simple plain text file
print("Loaded data from", filename)

data = data.replace('\n', ' ') #change '\n' to ' ' for better readability

chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)
print("Data has {} characters, {} unique.".format(data_size, vocab_size))

char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

print("Turn an array of characters to an array of numbers:")
data_ix = [char_to_ix[char] for char in data]
print("  data[0]={} has been turned to: \n  data_ix[0]={}".format(data[0], data_ix[0]))


Loaded data from data/first_names.txt
Data has 36122 characters, 27 unique.
Turn an array of characters to an array of numbers:
  data[0]=J has been turned to: 
  data_ix[0]=10

In [6]:
seq_len = 20 #length of a sequence of characters fed to RNN before asking for a next character while training
step_size = 3 #first sequence starts at index 0, next one at index 3 etc...
print("Cutting text into sequences of seq_len={} overlapping after each step_size={} characters:".format(seq_len, step_size))

sequences = [] #list of sequences
correct_next_char = [] #correct next char after each sequence - this will be used as target data to train the RNN

for i in range(0, len(data) - seq_len, step_size):
    sequences.append(data_ix[i:i+seq_len])
    correct_next_char.append(data_ix[i+seq_len])

sequences = np.array(sequences)
correct_next_char = np.array(correct_next_char)

print("  Sequences shape =", sequences.shape, sequences.dtype)
print("  correct_next_char shape =", correct_next_char.shape, correct_next_char.dtype)


Cutting text into sequences of seq_len=20 overlapping after each step_size=3 characters:
  Sequences shape = (12034, 20) int64
  correct_next_char shape = (12034,) int64

In [7]:
print("Vectorizing = transforming sequences and next_char to one-hot encoding:")
seq_one_hot = np.zeros(shape=(len(sequences), seq_len, vocab_size), dtype=np.bool)
next_char_one_hot = np.zeros(shape=(len(sequences), vocab_size), dtype=np.bool)

for s in range(len(sequences)):
    for c in range(seq_len):
        idx = sequences[s,c] #index of c-th char in s-th sequence
        seq_one_hot[s,c,idx] = 1
        
    next_char_idx = correct_next_char[s] #index of next char after the s-th sequence
    next_char_one_hot[s,next_char_idx] = 1

x = seq_one_hot
y = next_char_one_hot
print("  sequences[0,0]={} has been turned to: \n  seq_one_hot[0,0]={}".format(sequences[0,0], seq_one_hot[0,0]))
print("  correct_next_char[0]={} has been turned to: \n  next_char_one_hot[0]={}".format(correct_next_char[0], next_char_one_hot[0]))
print("  x = seq_one_hot shape =", seq_one_hot.shape, seq_one_hot.dtype)
print("  y = next_char_one_hot shape =", next_char_one_hot.shape, next_char_one_hot.dtype)


Vectorizing = transforming sequences and next_char to one-hot encoding:
  sequences[0,0]=10 has been turned to: 
  seq_one_hot[0,0]=[False False False False False False False False False False  True False
 False False False False False False False False False False False False
 False False False]
  correct_next_char[0]=3 has been turned to: 
  next_char_one_hot[0]=[False False False  True False False False False False False False False
 False False False False False False False False False False False False
 False False False]
  x = seq_one_hot shape = (12034, 20, 27) bool
  y = next_char_one_hot shape = (12034, 27) bool

In [8]:
def build_model(neurons, seq_len, vocab_size):
    print('Building single layer LSTM model with {} neurons...'.format(neurons))
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(seq_len, vocab_size)))
    model.add(Dense(vocab_size, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.RMSprop(lr=0.01))
    
    print("LSTM layer input shape =", (seq_len, vocab_size))
    print("Dense layer with vocab_size={} neurons and 'softmax' activation".format(vocab_size))
    return model

In [9]:
neurons = 50
model = build_model(neurons, seq_len, vocab_size)


Building single layer LSTM model with 50 neurons...
LSTM layer input shape = (20, 27)
Dense layer with vocab_size=27 neurons and 'softmax' activation

In [19]:
def train_model(epochs, batch_size, generate_chars, model, data, char_to_ix, ix_to_char, seq_len, vocab_size):
    print("Started training for {} epochs with batch size = {}".format(epochs, batch_size))
    for epoch in range(epochs):
        print("")
        print("-"*30)
        print("Epoch", epoch)
        model.fit(x, y, batch_size=batch_size, epochs=1)

        seq_start_index = random.randint(0, len(data) - seq_len - 1)

        sentence = data[seq_start_index: seq_start_index + seq_len] #sentence = sequence
        print('----- Generating with seed: "' + sentence + '"')

        for i in range(generate_chars):
            x_pred = np.zeros((1, seq_len, vocab_size)) #single sequence will be passed to trained RNN
            for t, char in enumerate(sentence): #turn sequence to one-hot
                x_pred[0, t, char_to_ix[char]] = 1. #sequence is from data = characters not numbers

            preds = model.predict(x_pred, verbose=0)[0] #get next_char prediction = vector of probabilities
            #next_index = sample(preds, diversity) #TODO implement sample function supporting different diversities
            next_char = vector_to_char(preds, ix_to_char)

            sentence = sentence[1:] + next_char

            sys.stdout.write(next_char)
            sys.stdout.flush()

In [10]:
epochs = 20
batch_size = 128
generate_chars = 50 #How many characters should be generated after each epoch

print("Started training for {} epochs with batch size = {}".format(epochs, batch_size))
for epoch in range(epochs):
    print("")
    print("-"*30)
    print("Epoch", epoch)
    model.fit(x, y, batch_size=batch_size, epochs=1)
    
    seq_start_index = random.randint(0, len(data) - seq_len - 1)
    
    sentence = data[seq_start_index: seq_start_index + seq_len] #sentence = sequence
    print('----- Generating with seed: "' + sentence + '"')
    
    for i in range(generate_chars):
        x_pred = np.zeros((1, seq_len, vocab_size)) #single sequence will be passed to trained RNN
        for t, char in enumerate(sentence): #turn sequence to one-hot
            x_pred[0, t, char_to_ix[char]] = 1. #sequence is from data = characters not numbers

        preds = model.predict(x_pred, verbose=0)[0] #get next_char prediction = vector of probabilities
        #next_index = sample(preds, diversity) #TODO implement sample function supporting different diversities
        next_char = vector_to_char(preds, ix_to_char)

        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()


Started training for 20 epochs with batch size = 128

------------------------------
Epoch 0
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.7628     
----- Generating with seed: "A DAYSI DARLENA DARC"
H  H  H  H  H  H  H  H  H  H  H  H  H  H  H  H  H 
------------------------------
Epoch 1
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 2.5401     
----- Generating with seed: " MAJORIE MAGDA MAC L"
LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
------------------------------
Epoch 2
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 2.4221     
----- Generating with seed: " DEBI DARRICK DARLEE"
  II  II  II  II  II  II  II  II  II  II  II  II  
------------------------------
Epoch 3
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 2.3185     
----- Generating with seed: "H KIMBERLY DEBORAH J"
ARINIA CICININIA CICININIA CICININIA CICININIA CIC
------------------------------
Epoch 4
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.2598     
----- Generating with seed: "EA DEADRA DAYSI DARL"
ENN JANNE JANNE JANNE JANNE JANNE JANNE JANNE JANN
------------------------------
Epoch 5
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.2209     
----- Generating with seed: "UBIA NU NORIKO NOHEM"
INNENNENNENNENNENNENNENNENNENNENNENNENNENNENNENNEN
------------------------------
Epoch 6
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.1863     
----- Generating with seed: "A KANDRA KANDIS KAMI"
A ARA ARA ARA ARA ARA ARA ARA ARA ARA ARA ARA ARA 
------------------------------
Epoch 7
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.1476     
----- Generating with seed: "ALA KALYN KALLIE KAL"
LA LANALLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL
------------------------------
Epoch 8
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 2.1007     
----- Generating with seed: "E ELANOR EDDA ECHO E"
MANA MARIANA MARIANA MARIANA MARIANA MARIANA MARIA
------------------------------
Epoch 9
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 2.0619     
----- Generating with seed: "SA LUCILE LORIE LEAN"
 RISTA RISTA RISTA RISTA RISTA RISTA RISTA RISTA R
------------------------------
Epoch 10
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 2.0248     
----- Generating with seed: "ETTA ESTELLA ELVA EF"
RIN RORI RORI RORI RORI RORI RORI RORI RORI RORI R
------------------------------
Epoch 11
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 1.9868     
----- Generating with seed: "IE BARBERA BARBAR BA"
LLE ALLEN GRILL CARLE CLELL GELLE ALLEN GRILL GELL
------------------------------
Epoch 12
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 1.9465     
----- Generating with seed: " MCKENZIE MAYE MAYBE"
L MARLI MARLI MARLI MARLIN MARLIN MARLIN MARLIN MA
------------------------------
Epoch 13
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.9052     
----- Generating with seed: "CKA ELNORA ELLIOTT E"
LENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELE
------------------------------
Epoch 14
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.8631     
----- Generating with seed: "ARIANO MARGOT MA LOU"
DY MARISTY MARISHA MARISTE MARISTY MARISHA MARISTE
------------------------------
Epoch 15
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.8075     
----- Generating with seed: "COLAS MARISSA LOURDE"
 MARABERTO MARABERTO MARABERTO MARABERTO MARABERTO
------------------------------
Epoch 16
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.7676     
----- Generating with seed: " MALIA MAIRA MAEGAN "
MICIA MICIA MICIA MICIA MICIA MICIA MICIA MICIA MI
------------------------------
Epoch 17
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.7324    
----- Generating with seed: " ANTWAN ANNETTA ANNE"
TTA ARINA ARISA ARINA ARISA ARINA ARISA ARINA ARIS
------------------------------
Epoch 18
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.6961     
----- Generating with seed: "THA SHAWNA RENA ORA "
ROSENE ROSANE RONETTE RESA RESA RENETA RESA RENETA
------------------------------
Epoch 19
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.6668     
----- Generating with seed: "EARLE PAULETTA PATRI"
E ROBELBRE GRADA ROBELBERTORDE GENNIE BENNIE BENNA

Repetitions gone?

Looks like the network has got rid of the repetitions but they are actually just hiding on a larger scale, let's continue training...


In [18]:
model_backup = model
epochs = 20
batch_size = 128
generate_chars = 250 #5 times as more chars generated after each epoch
train_model(epochs, batch_size, generate_chars, model, data, char_to_ix, ix_to_char, seq_len, vocab_size)


Started training for 20 epochs with batch size = 128

------------------------------
Epoch 0
Epoch 1/1
12034/12034 [==============================] - 8s - loss: 1.5022     
----- Generating with seed: "NNA LAWANA LAVONIA L"
ARIA LARIA LARA LARA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA LAURA L
------------------------------
Epoch 1
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.4836     
----- Generating with seed: "NE JACLYN HUMBERTO G"
REDA ROBELLE RENA RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA RESALE RENA RENA R
------------------------------
Epoch 2
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.4526     
----- Generating with seed: "YLISS PHYLIS PHUONG "
HENDREE HERLENE GENETTE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELENE ELEN
------------------------------
Epoch 3
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.4352     
----- Generating with seed: " JESICA JERROLD JEFF"
IE JERELIN JEA JEANNE JANIE JANIE JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JANELL JA JAN
------------------------------
Epoch 4
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.4067     
----- Generating with seed: "CO FLORIDA FLORENTIN"
E FSIENE SHANIELA SUEDA SHAMEE SHANA SHARICK STANIETTA SUSTANN SUMITA SUEDA SUEDA SUATTUN SUEDA SUEDIE SUANTY SUATTUNA SUETTA SUEDIE SUADY SUANTY SUATTAN SUATTAN SUSTANN SUATTAN SUATTAN SUSTANN SUATTAN SUATTAN SUSTANN SUATTAN SUATTAN SUSTANN SUATTAN 
------------------------------
Epoch 5
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.3916     
----- Generating with seed: "GGIE KRISTINE KAYLA "
KATTER KARINA RACK KISA RANA ROBELL RANDA RAMEN RACK RANDA RAMEN ROBELL RANDA RACK RON GRADA ROBELL RONA ROSSA RONA ROSA RONA ROBELL RORA ROBELL RABRA ROXANN GRADA RACH KRITTA RACK RANNA RACK RANA RACK ROBELL RAMARA RAXANN KARINA RAXINA RACK RANDA KR
------------------------------
Epoch 6
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.3653     
----- Generating with seed: "E CAREY CANDY BERT A"
NNIELLA ARVENA ALICIA ARLENE ALICA CHELISA CHERLESCERIE CHELISIA CHERLESHINA CHERLIE CHERLESHINA FLON CHERLISCE CILISICE CHELLIS CHERLESHA CHERLESHINA CHERLIE CHERLESHINA FLON CHERLISCE CILISICE CHELLIS CHERLESHA CHERLESHINA CHERLIE CHERLESHINA FLON 
------------------------------
Epoch 7
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.3471     
----- Generating with seed: "HODA RAE NOLA NELDA "
LUCHERLEN JELEN JONETTE JENETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JEANNETTA JEANNE JEANNETTE JE
------------------------------
Epoch 8
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.3246    
----- Generating with seed: "EA AUDIE ASUNCION AS"
TANA DOSEY ROHAN ROSEY RACHRY ROREY REXANE DONEY ROYAN ROSEY ROSANE ROSEY REXANE ROSEY ROSANY ROSEY ROSANDO ROSEY ROSANE ROSEY REXANE ROSEY ROSANY ROSEY ROSANDO ROSEY ROSANE ROSEY REXANE ROSEY ROSANY ROSEY ROSANDO ROSEY ROSANE ROSEY REXANE ROSEY ROSA
------------------------------
Epoch 9
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.3051     
----- Generating with seed: " LASHAY LASHAWNDA LA"
NNA JANNA JACQUEL JANNIE JANNIE JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JANNA JA
------------------------------
Epoch 10
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.2895    
----- Generating with seed: "EBERA DEANDREA DEADR"
E CHERLER CERRIE CRASTE CRACELY ROBERTO ROMEY ROSA MOLY RUE MOL MERLY MERIBERTO MARBER MARGEL MARYA MARLER MARBER MARGER MARGER MARLETTE MARMEY MARMEY MARIKA MARGER MARGER MARGER MARLETTE MARMEY MARMEY MARIKA MARGER MARGER MARGER MARLETTE MARMEY MARM
------------------------------
Epoch 11
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.2721     
----- Generating with seed: " LYLA LYDA LUPITA LU"
LLA LENTA LELIA LENNELL JANNE JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQUEY JANNIE JACQUELL JANN JACIA JACQ
------------------------------
Epoch 12
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.2534     - ET
----- Generating with seed: "E LAVERNE KENDRA JAS"
IE JONETTA JEFELINE JEFIFAN JEFIFIE GEON GENDIGE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE GEORINE G
------------------------------
Epoch 13
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.2350    
----- Generating with seed: "LBY CLEMENT CHASITY "
ALLA ANNISSA AMILA AMILA ADIELIA ANGERINA ANNICHA ANNIE ANNISSANA ANDALIA ANDALIA ANNIE ANNISSANANA ANGANDA ANDINANNA ANNICKA ANDALINA ANNIELLA ANNICK AMMITT NANDAL NIMA LUNDA MARGEN MARGER MARIANN MARINANN LAVINA LANNA LANNANNAL LAN LAN LAURA LAURON
------------------------------
Epoch 14
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.2190     
----- Generating with seed: "ERICA JENNINE JENNEL"
LE JULINA VANNIE VANNIE VALETTA VELANN VERNITA VERNETTA VERNETTEN VINA VERNETTA VERNIE VERNETTA VERNIN VINNINE VENNIE VENNETTEN VINNINE VINNITA VERNITA VENNETTE VERNETTEN VINDIE VERNETTA VERNINE VERNETTANNA VINNETTA MARGERON WILLINE WILLETHONNA MIRTO
------------------------------
Epoch 15
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1942     
----- Generating with seed: "ANN DIANE ALICE JEAN"
A SHANTA THER SHAR TANA TAMETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TANA TANA TANA TARETTA TANA TANTA TAN
------------------------------
Epoch 16
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1831     
----- Generating with seed: "MI NOBUKO NISHA NIES"
 MARILLA LAURA LARITA LATANELLE KATRIS KARINA KARINA KARINA KARIA KATRIE KARINA KATRIE TARINA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA REANA 
------------------------------
Epoch 17
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1761     
----- Generating with seed: "I AMBROSE ALTAGRACIA"
 ARVER BEVELLE ELENE ELIE ELICE ELENE ELIE ELICE LEESHEE LEENE LEVON LEANE LILA LENNA LEANE LENNELLE LATENE LANDA LANN LATOMERLE LARETTE LARETTE LARETTE JACK JEREE CARLENE CLAMEE CLAMEL CERIS CLERESCER CLEVELLE CLERESHA JANETTE CLAUNEE CLAMEL CERISSE
------------------------------
Epoch 18
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1671     
----- Generating with seed: "MAY MANDY LOWELL LOR"
A LICHER LEN LEN LEANIE LEIN JEANIE CLEVELL CETRIE CERRIS CORDIE CARMIE CARMIE CARMIN CARIE CARIE CARMEL NARLE NATAMER BERT ERMAN BEOR BERNIE BEOR BERN BENNI DERNIE BEOR BEOR BEOR BENDIE BEOR BENNI EDDU DENIE ELIA ELINE ELIN ELIE ELIE OLEN ELEN ELENE
------------------------------
Epoch 19
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1490     
----- Generating with seed: "LYN ERMA BLANCA ANGE"
LIA NISSALE WILLETTE WILLETTO VINTY VILLA VINA VITTE VERRANNE VERMAN JUAN JANN JOSTIN JANNA JONDIE JOSEL JAFIE JEANNE JANNETTA JANN JANIE JOY JANNELL JANNE JANA JACQUEY JANI JANI JANN JUSSEL HARRON CHERB HERRY GILLIE GENDA HERRY GISSINE GILLENE GRODE

Still some repetitions, another 20 epochs...


In [20]:
model_backup = model
epochs = 20
batch_size = 128
generate_chars = 250 #5 times as more chars generated after each epoch
train_model(epochs, batch_size, generate_chars, model, data, char_to_ix, ix_to_char, seq_len, vocab_size)


Started training for 20 epochs with batch size = 128

------------------------------
Epoch 0
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1381     
----- Generating with seed: " TAMEKIA TAMEIKA TAM"
Y THANT THACO TRESS TREDELTTT BARLYN SUETHA NATHILL WILLIA MARLETTE MARLINY MARLETT KARLY KRISTY KARLE NATAY KARLET KARLEY KATHLYN KATHA KATHLL JULD JULTE NATA VARRY NATHLL WATHOLL WILTA VERN VITHA VERNETTA VARLET VALLA PAMLE PARLETT HANALL JACK TETT
------------------------------
Epoch 1
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1325     
----- Generating with seed: "OAQUINA JOANNIE JOAN"
IE SHARICK SHAWNO STANITA SHANITA SHANISHA SHANISHA SHANITA SHANITA SASHARIE SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANITA SHANI
------------------------------
Epoch 2
Epoch 1/1
12034/12034 [==============================] - 11s - loss: 1.1201    
----- Generating with seed: " CHERELLE CHEREE CHE"
RIL CHERLEE CHERISHE CHERLE HILE HERLE HERLE HETTE HILEIH HERISH THERISSE HHOLLE HERRIL HESHE HERIE HEA HERIPE HEYLE HERISHA HISHA HELLIE HELLIE HERY HEYLE HERIE HERLEY HELL HHYDE HHELIA HERIE HERRY HELL HEYHE HERISIA HEA HERRIE HERLINE HESMINA HEAMI
------------------------------
Epoch 3
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.1061     
----- Generating with seed: "N THADDEUS TANISHA S"
ANTIE ROBIN SHARETHA TILY SHAR TYYCEY SHAR SCOLPIE SHARLEY SHAR CHARLEY CHEREE CHEREELLE CHERLEY CHERRELY CHERRIE CHERRELEE HERRICH PHILLEN PHIL POYCE PILVEN PHILIETH HHOLELLE YRISHA HELLEIE HERRIE HERRIE HELLA WINTH WILLEE EVENI ARTONETTE ARICELINE 
------------------------------
Epoch 4
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.1008    
----- Generating with seed: "NE BELINDA FELIX FAY"
EN FLON FLINA FLON FLINA FLON FSIE FFINICE FFINTENA FFI FFI FIELLA FFINDEL FLONNICA FANNIE FSIE FFINA FSIE FLONANN SHELANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA SHANA S
------------------------------
Epoch 5
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0840    
----- Generating with seed: "ICIA CHRISTOPHER LIN"
CE KRISTO DORI ROSELIA ROSENISH ROSALE ROISIA ROSSIE ROSELIO PORTOR ROSSO ROSSO ROSHORD ROSSO ROSSO ROSHELL ROSSA ROSSO ROSELIO ROCH ROSSO RICK ROSA ROSSIE ROSSIE ROSSIE ROSSELL ROSSIE ROSSIE ROSSIE ROSSELL ROSSIE ROSSIE ROSSIE ROSSELL ROSSIE ROSSIE 
------------------------------
Epoch 6
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.0767     
----- Generating with seed: " SHANI SHANELL SHAND"
A SHAREE SHERMANIE SHIERLES SHELIA SHELILIO HIGELLE YESHAREE ISSHEE HERMANDIE HERRIE GERRATINE GEORA GENDRIE GEOR GENNICE ERISHA JEENNE DERISCER GEANCER GEORGIE EEN ELINE ELIE ELICIE ELESHA ELIA ELICI ELIA LEESHE LUVIS LEENA LEYANE LILOZA LEVON LETTE
------------------------------
Epoch 7
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.0649     
----- Generating with seed: "FLOYD DEAN CARLA WES"
HA NANDA NANITA NANNAMA KATHUN KIRLYN KEINA KETTINE KEINA KEINA KEINA KEERA KEESHA KEYAN KATHA KALIA KARITA KAMIE KATALINA KARITA KANITA KEINA KATHRISHA KEINA KATHARINE KATIA KATHA KAISHA KEINA KANDIEL KIETHA KIMIK KITWALIA KATAMINA KATRIEL KAIS KATH
------------------------------
Epoch 8
Epoch 1/1
12034/12034 [==============================] - 9s - loss: 1.0607     
----- Generating with seed: "K COREY TONYA DAN EL"
LIA ELLIN ELLIE ELICE ELLIE ELIS ELLIE ELICE ELLINE ELVEN ELLIN ELLIS ELLIE ELICE ELLINE ELVEN ELLIN ELLIS ELLIE ELICE ELLINE ELVEN ELLIN ELLIS ELLIE ELICE ELLINE ELVEN ELLIN ELLIS ELLIE ELICE ELLINE ELVEN ELLIN ELLIS ELLIE ELICE ELLINE ELVEN ELLIN E
------------------------------
Epoch 9
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0464    
----- Generating with seed: "RLEN ARDITH ARDIS AR"
LEN ARRY BESHY ARNETT BRIDY BOLL DENDIE DARRY CORTIA CARIDY CARMEN CARIE CARIEL CARMIN CARIE CLADELIA CARIDY CARIE CARIE CARIDA CARISTINA CARIEL CARMIN CARIE CLADELIA CARIDY CARIE CARIE CARIDA CARISTINA CARIEL CARMIN CARIE CLADELIA CARIDY CARIE CARIE
------------------------------
Epoch 10
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0425    
----- Generating with seed: "CONCETTA COLLIN COLE"
NA CLAUDLON COLL CARIS CRADIE CRADELL CRAVIE CRYSTIN CRAFLEN RORA ROSHORD RENNALD RESALEE RICH HERRIEL GESHERICE GERRISCIE GERRIGINE GERRY GERLINE GERRIE GENIRDO GRILLA GRYSCE GRADID GERRISCIN GERTICE GERRIE ISSELINA HELLA PENESHA HERNIE HERLIE HERRI
------------------------------
Epoch 11
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0288    
----- Generating with seed: " GUADALUPE CAROLINE "
JANA LARA LAUNDA LANDA LANDA LANORA LAVENA JANICLA LENDA PAMOK ROLLA PAMMY PARLETO GRASSIE RAISA ROYAN RACHERRE ROBE HETTAMIE HEANNA JEYANE JOYFLICE JANA JANNA JACKATTINA JUANNA JANNA LAVONA JACQUELL YADA VANNAN VALLA VANA VALA VANA VERA VAL VERTIN V
------------------------------
Epoch 12
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0312    
----- Generating with seed: "TTE CLAUDINE CLAIR C"
RATANNA CHARLEY CARLEN CARLON CARLEN CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CARLON CAR
------------------------------
Epoch 13
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0226    
----- Generating with seed: "LOLA KRISTY KENT BEU"
D THEERTER RYSHA RACK THAROR GRAGA GILANN GIEDA ISTIE RIMEY GARROY HORY THASSY THAR TIKA TUKON TWISTY TUKKY TWEY TWENDY TWYCE TUKIS TREEFASTINA THERA TYE TAMONY THISHY TIRRIE THRRETTA TRISTAL REEN ROBETTA PHINTE RICKIOR RETTERI GLAISHA RAYMER RAICH T
------------------------------
Epoch 14
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0100    
----- Generating with seed: "ILA HILARY HARRISON "
HERNIE HERLINE GERLY GERLYN GERTENE GARRIE GEORA GIENA GEANGORA GEORANDA GEORA GEORA GENTE GREN GEERNORE GERRIE GEANGROD GEORGONEN GARRY ERVEN EDENIA ERMENE ELENE ELENE ARDEN EDENA ELENE ARDENA ELIA EDELIA ELEANIA EDELIA ELENE ARDER LEXINE LEEAND ELI
------------------------------
Epoch 15
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 1.0047    
----- Generating with seed: "NNY SOPHIE SONJA SIL"
FORD STANNA SHARRIE SHARRY CHIFISCESHA CHUNN SHANNILL STENI STANTEL SHORESSA COUNNIE COLFIE CORETTEN COLL CORNILL DONNI CCACIANNE COLL STERRICE SHARLOR CATHERINE CARIE CARISA COLL SAPHLURY NATALVENA STANISSA TIMOKY STINTE SHARLEY SHARLEY SHA SHA SHAR
------------------------------
Epoch 16
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 0.9942    
----- Generating with seed: " RACHELL RACHELE QUI"
NA QUISTA TWENDIN SHANA TAMALANA AVITA AUSTA AMANA ROSALE LORIA LOONE LOONO LORIS LUCIA LORYA LORTOR LAURANA LANCELLA LARA LANNALDA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LANA LAN
------------------------------
Epoch 17
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 0.9902    
----- Generating with seed: "A SHAY SHAWANDA SHAS"
ORA SHARNIE SHANA SHARLETA SHANA SHARINE NUCOLINA LUCI LUCIA LEANNI LEAN LEANE LEANNE LANN LATON CLAN CLAN CLAN CRAY NAS ANN SANNA NANNALA NANNA NANTA NANA LANN LANN LATONCE LANORA LATRICK LANNIRA LANE KATRIE MARINA LAVENNE MARINE LUCI MILON MARGLE M
------------------------------
Epoch 18
Epoch 1/1
12034/12034 [==============================] - 11s - loss: 0.9924    
----- Generating with seed: "DALYN MADALINE MACY "
MARA LANDIE LALEY LALIA LANDA LANORA LATHERLE LATISHA LARINA LATON LAKINA LANN LAND LALOY JANELIA NATIFINA NATIMILA JANIE TARIKA TAMIKA TAMIKA TOFINA THENA TIKELLE THIQUY SHERLE YRISHA TOMIKA TRISTON TIBELLITT TENEMIE KIELLE KEINA KEERLE KESEY KAISA 
------------------------------
Epoch 19
Epoch 1/1
12034/12034 [==============================] - 10s - loss: 0.9812    
----- Generating with seed: "TOMIKA TINY TIESHA T"
OLONE THONNA THONS THOCELLON PEVELLPHEPHO GENIL HUDETTE GINNA THECITT GALLIA GIELMA GEORA GIEVINENN GEANGOLA GILLIN GERLITTE GBISH GRESSINCE GALLAINE CHANTEL JOSELLE CLAUPHON CLAY CLAUNNE LORIN CLANDRA CHERLEN CHERLETTE CORTIE CRRAGISTE CLETRIDA CLAM

Looking good ...

Lets check with a larger generated sample.


In [22]:
generate_chars = 1000
sentence = data[seq_start_index: seq_start_index + seq_len] #sentence = sequence
print('----- Generating with seed: "' + sentence + '"')

for i in range(generate_chars):
    x_pred = np.zeros((1, seq_len, vocab_size)) #single sequence will be passed to trained RNN
    for t, char in enumerate(sentence): #turn sequence to one-hot
        x_pred[0, t, char_to_ix[char]] = 1. #sequence is from data = characters not numbers

    preds = model.predict(x_pred, verbose=0)[0] #get next_char prediction = vector of probabilities
    #next_index = sample(preds, diversity) #TODO implement sample function supporting different diversities
    next_char = vector_to_char(preds, ix_to_char)

    sentence = sentence[1:] + next_char

    sys.stdout.write(next_char)
    sys.stdout.flush()


----- Generating with seed: "LYN ERMA BLANCA ANGE"
LLE NARRUK LUCIUNE LUCIETCARD LUGU LUCIATO LORIE LUCION JOSTEN CLOVOL CETRISTIN BRYDCETT BEATHINA BRYDD BBEYSCE COLTIN CHERUDS CARLENCE CRYSTIN CRASTA CREFLORD BRYCIA BENNITT BEVELLIN BEOSETTO BRLOND BRENDIN BBISHER BENETHA DEROLLE BRADISTE CLETRIE CRASTEN CRYSTA CHERRY CRATISTE RONA ROSSIE ROHALE RICK ROBBY RUGLY RUGLY ROSALIA ROSALIO REVOLL REANE ROSHEL REENA ROSEL REYMORD RESSALDON ROSALMIRO ROBBYE KESTI KETTIFMA GENETTE GILONE GILLINE GENETT GILLINE GERRISCE GILLID GERLL GENDIGDI GILLINE RAMEE RORLORD REBRIE ROSELLO YOSSY SHANTA CRYDTON CHERL CRYSTIND CRAFLER CRLY CRYSTAPHE ROSELLE POLPHIN PHOTTINNE PHOLLPE POLLIE HIHA HEVORA HETRILLE GERLANCE JENEE METHENEL MARIBERTE MARCHEN MARGERLO LUCHER LEANE JEANNE JELLEN JANNA JEANAJDANNE JACIA JA JANN TAMEL TUGG TAMEY BINETT WADELL WILLETA MARGELLE MARIAL MAROMO MAROLA MARDONNE MARCELON MARLINE KARLIEN KATHELINE JUSTEPHA NANA LUCIATO LORLA LOLLE LOUN LUCIET LUCIE LUCIETTA LETTELLE LAUROMA KARRY JANA LANETTERINE JANA JANA JACQUEL JANNETTE JA

No large scale repetitions even with 1000 characters.


In [ ]: