LSTM Poem Generator


In [2]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import RNN
from keras.utils import np_utils

In [3]:
sample_poem = open('sample_sonnets.txt').read().lower()
sample_poem[77:99]


Out[3]:
'ght never die,\n but as'

Method 1 - Character Based Poem Generation


In [9]:
characters = sorted(list(set(sample_poem)))

n_to_char = {n:char for n, char in enumerate(characters)}  # store characters and their index
char_to_n = {char:n for n, char in enumerate(characters)}

print(n_to_char[7])
print(n_to_char[9])


:
?

In [73]:
X = []
y = []
total_len = len(sample_poem)
seq_len = 100  # each time we choose 100 character as a sequence and predict the next character after the sequence

for i in range(total_len - seq_len):
    seq = sample_poem[i:i+seq_len]
    label = sample_poem[i+seq_len]
    
    X.append([char_to_n[char] for char in seq])
    y.append(char_to_n[label])

In [ ]:
# LSTM acceptable format, (number of sequneces(batch size), sequnece length (timesteps), number of features)
X_modified = np.reshape(X, (len(X), seq_len, 1))  
X_modified = X_modified / float(len(characters))  # normalize the value

y_modified = np_utils.to_categorical(y)  # convert to one-hot format, there are 36 distinct characters in total

In [37]:
print(X_modified.shape)
print(y_modified[4:10])


(4325, 100, 1)
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]

In [39]:
model = Sequential()
model.add(LSTM(700, input_shape=(X_modified.shape[1], X_modified.shape[2]), return_sequences=True))
model.add(Dropout(0.2))  # dropout is used for regularization
model.add(LSTM(700, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(700))
model.add(Dropout(0.2))
model.add(Dense(y_modified.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [42]:
model.fit(X_modified, y_modified, epochs=10, batch_size=100)


Epoch 1/10
4325/4325 [==============================] - 623s 144ms/step - loss: 3.0395
Epoch 2/10
4325/4325 [==============================] - 759s 175ms/step - loss: 3.0013
Epoch 3/10
4325/4325 [==============================] - 771s 178ms/step - loss: 3.0015
Epoch 4/10
4325/4325 [==============================] - 758s 175ms/step - loss: 2.9933
Epoch 5/10
4325/4325 [==============================] - 788s 182ms/step - loss: 2.9917
Epoch 6/10
4325/4325 [==============================] - 701s 162ms/step - loss: 2.9900
Epoch 7/10
4325/4325 [==============================] - 704s 163ms/step - loss: 2.9953
Epoch 8/10
4325/4325 [==============================] - 699s 162ms/step - loss: 2.9869
Epoch 9/10
4325/4325 [==============================] - 709s 164ms/step - loss: 2.9787
Epoch 10/10
4325/4325 [==============================] - 717s 166ms/step - loss: 2.9623
Out[42]:
<keras.callbacks.History at 0x29064b4dda0>

In [43]:
model.save_weights('poem_generator_gigantic.h5')  # save weights, so that later we can use without re-running the model

In [50]:
model.load_weights('poem_generator_gigantic.h5')

In [74]:
new_poem_lst = []

for j in range(77, 99):  # randomly choose some records and predict the sequence (generate the poem)
    string_mapped = X[j]  
    full_string = [n_to_char[value] for value in string_mapped]

    for i in range(10):  # predict the next 10 character
        x = np.reshape(string_mapped,(1,len(string_mapped), 1))
        x = x / float(len(characters))

        # predict the next character
        pred_index = np.argmax(model.predict(x, verbose=0))  
        seq = [n_to_char[value] for value in string_mapped]
        full_string.append(n_to_char[pred_index])

        # predicted character will be added to support the next prediction
        string_mapped.append(pred_index)
        string_mapped = string_mapped[1:len(string_mapped)]
        
    new_poem_lst.extend(full_string)

In [75]:
generated_poem = ''.join(new_poem_lst)
print(generated_poem)


ght never die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 bu    t    tht never die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but    t    tt never die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but    t    t  never die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but t      t   never die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but th      t   ever die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but tho      t   ver die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou       t  er die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou,      t   r die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou,      t   t die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, c     t    die,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, co     t    ie,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, con    t    te,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, cont      t   ,
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contr       t  
 but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contra       t   but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contrac      t   but as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contract      t   ut as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracte      t   t as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracted      t    as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracted      t    as the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracted t     t    s the riper should by time decease,
 his tender heir might bear his memory:
 but thou, contracted to     t    

Observation...

I guess those readable words came from the original poem, they served as the testing data.

Method 2 - Word Based Poem Generation

Simply map words to index, without tokenizing


In [4]:
words = sorted(list(set(sample_poem.split())))

n_to_word = {n:word for n, word in enumerate(words)}  # store characters and their index
word_to_n = {word:n for n, word in enumerate(words)}

print(n_to_word[7])
print(n_to_word[9])


age
all

In [25]:
X = []
y = []
all_words = sample_poem.split()
total_len = len(all_words)
seq_len = 100  # each time we choose 100 character as a sequence and predict the next character after the sequence

for i in range(total_len - seq_len):
    seq = all_words[i:i+seq_len]
    label = all_words[i+seq_len]
    
    X.append([word_to_n[word] for word in seq])
    y.append(word_to_n[label])

In [26]:
# LSTM acceptable format, (number of sequneces(batch size), sequnece length (timesteps), number of features)
X_modified = np.reshape(X, (len(X), seq_len, 1))  
X_modified = X_modified / float(len(words))  # normalize the value

y_modified = np_utils.to_categorical(y)  # convert to one-hot format, there are 36 distinct characters in total

In [15]:
print(X_modified.shape)
print(y_modified[4:10])


(661, 100, 1)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

In [16]:
model = Sequential()
model.add(LSTM(700, input_shape=(X_modified.shape[1], X_modified.shape[2]), return_sequences=True))
model.add(Dropout(0.2))  # dropout is used for regularization
model.add(LSTM(700, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(700))
model.add(Dropout(0.2))
model.add(Dense(y_modified.shape[1], activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

In [17]:
model.fit(X_modified, y_modified, epochs=10, batch_size=100)


Epoch 1/10
661/661 [==============================] - 91s 138ms/step - loss: 5.9895
Epoch 2/10
661/661 [==============================] - 83s 126ms/step - loss: 5.7600
Epoch 3/10
661/661 [==============================] - 84s 127ms/step - loss: 5.6341
Epoch 4/10
661/661 [==============================] - 84s 127ms/step - loss: 5.5812
Epoch 5/10
661/661 [==============================] - 87s 132ms/step - loss: 5.5664
Epoch 6/10
661/661 [==============================] - 90s 136ms/step - loss: 5.5553
Epoch 7/10
661/661 [==============================] - 86s 130ms/step - loss: 5.5377
Epoch 8/10
661/661 [==============================] - 97s 146ms/step - loss: 5.5548
Epoch 9/10
661/661 [==============================] - 93s 141ms/step - loss: 5.5511
Epoch 10/10
661/661 [==============================] - 93s 141ms/step - loss: 5.5504
Out[17]:
<keras.callbacks.History at 0x1950e4d1c18>

In [18]:
model.save_weights('poem_generator_gigantic_word.h5')

In [19]:
model.load_weights('poem_generator_gigantic_word.h5')

In [27]:
new_poem_lst = []

for j in range(77, 99):  # randomly choose some records and predict the sequence (generate the poem)
    string_mapped = X[j]  
    full_string = []  # different from character based, here not recording the original sequence

    for i in range(10):  # predict the next 10 character
        x = np.reshape(string_mapped,(1,len(string_mapped), 1))
        x = x / float(len(words))

        # predict the next character
        pred_index = np.argmax(model.predict(x, verbose=0))  
        seq = [n_to_word[value] for value in string_mapped]
        full_string.append(n_to_word[pred_index])

        # predicted character will be added to support the next prediction
        string_mapped.append(pred_index)
        string_mapped = string_mapped[1:len(string_mapped)]
        
    new_poem_lst.extend(full_string)

In [28]:
generated_poem = ' '.join(new_poem_lst)
print(generated_poem)


thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou thou

Observation...

  • Word based method here tend to show too much words if we include original sequences, so here I removed them. Now you are seeing the real results, predicted sequences... Obviously, the number of epoch makes a difference.
  • The positive side here is, when using words, in fact there are less number of sequences used as training data, less meory. But also because of the small number of training data, it can be less accurate. Although in both cases here, none of them is accurate...