In [ ]:
import numpy as np
import string
import time
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import np_utils
from keras.preprocessing.sequence import pad_sequences
In [81]:
alphabet = string.ascii_uppercase
In [82]:
alphabet
Out[82]:
In [83]:
map_to_int = {c:num for num, c in enumerate(alphabet)}
In [84]:
map_to_char = {num:c for num, c in enumerate(alphabet)}
In [85]:
# generating input data: a sequnce of characters of diffrerent lenths (max_lenth = 5)
max_lenth = 5
samples = 1000
batch_size = 1
X, y = [], []
for _ in range(samples):
start = np.random.randint(len(alphabet) - 2)
# not to exceed num of characters
end = np.random.randint(start, min(start + max_lenth, len(alphabet) -1))
seq_in = alphabet[start: end + 1]
seq_out = alphabet[end + 1]
X.append([map_to_int[i] for i in seq_in])
y.append([map_to_int[i] for i in seq_out])
In [86]:
X = pad_sequences(X, maxlen=max_lenth, dtype='float32')
# normalization
X = X/len(alphabet)
# reshaping [samples, time steps, features]
X = X.reshape(X.shape[0], max_lenth, 1)
y = np_utils.to_categorical(y)
In [98]:
def build_model():
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], 1)))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
In [99]:
model = build_model()
In [100]:
start = time.time()
model.fit(X, y, epochs=800, batch_size=batch_size, verbose=0)
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1]*100))
print("It took: {} minutes".format((time.time() - start)/60 ))
In [129]:
for i in ['B', "BCD", 'STU', 'R', 'DEFGH']:
s = [map_to_int[s] for s in i]
x = pad_sequences([s], maxlen=max_lenth, dtype='float32')
x = x.reshape(1, max_lenth, 1)
x = x / float(len(alphabet))
prediction = model.predict(x, verbose=0)
index = np.argmax(prediction)
result = map_to_char[index]
print("After {} comes {}".format(i, result))