In [ ]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import sys
import re
import pickle
Next, we will import the data we saved previously using the pickle
library.
In [ ]:
pickle_file = '-basic_data.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
X = save['X']
y = save['y']
char_to_int = save['char_to_int']
int_to_char = save['int_to_char']
del save # hint to help gc free up memory
print('Training set', X.shape, y.shape)
Now we need to define the Keras model. Since we will be loading parameters from a pre-trained model, this needs to match exactly the definition from the previous lab section. The only difference is that we will comment out the dropout layer so that the model uses all the hidden neurons when doing the predictions.
In [ ]:
# define the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=False, input_shape=(X.shape[1], X.shape[2])))
# model.add(Dropout(0.50))
model.add(Dense(y.shape[1], activation='softmax'))
Next we will load the parameters from the model we trained previously, and compile it with the same loss and optimizer function.
In [ ]:
# load the parameters from the pretrained model
filename = "-basic_LSTM.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')
We also need to rewrite the sample()
and generate()
helper functions so that we can use them in our code:
In [ ]:
def sample(preds, temperature=1.0):
preds = np.asarray(preds).astype('float64')
preds = np.log(preds) / temperature
exp_preds = np.exp(preds)
preds = exp_preds / np.sum(exp_preds)
probas = np.random.multinomial(1, preds, 1)
return np.argmax(probas)
In [ ]:
def generate(sentence, sample_length=50, diversity=0.35):
generated = sentence
sys.stdout.write(generated)
for i in range(sample_length):
x = np.zeros((1, X.shape[1], X.shape[2]))
for t, char in enumerate(sentence):
x[0, t, char_to_int[char]] = 1.
preds = model.predict(x, verbose=0)[0]
next_index = sample(preds, diversity)
next_char = int_to_char[next_index]
generated += next_char
sentence = sentence[1:] + next_char
sys.stdout.write(next_char)
sys.stdout.flush()
print
Now we can use the generate()
function to generate text of any length based on our imported pre-trained model and a seed text of our choice. For best result, the length of the seed text should be the same as the length of training sequences (100 in the previous lab section).
In this case, we will test the overfitting of the model by supplying it two seeds:
If the model has not overfit our training data, we should expect it to produce reasonable results for both seeds. If it has overfit, it might produce pretty good results for something coming directly from the training set, but perform poorly on a new seed. This means that it has learned to replicate our training text, but cannot generalize to produce text based on other inputs. Since the original article was very short, however, the entire vocabulary of the model might be very limited, which is why as input we use a part of another speech given by Obama, instead of completely random text.
Since we have not trained the model for that long, we will also use a lower temperature to get the model to generate more accurate if less diverse results. Try running the code a few times with different temperature settings to generate different results.
In [ ]:
prediction_length = 500
seed_from_text = "america has shown that progress is possible. last year, income gains were larger for households at t"
seed_original = "and as people around the world began to hear the tale of the lowly colonists who overthrew an empire"
for seed in [seed_from_text, seed_original]:
generate(seed, prediction_length, .50)
print "-" * 20