In [1]:
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.layers import Input, Dense, Embedding
from keras.layers.recurrent import LSTM
from keras.models import Model
from keras.layers.wrappers import TimeDistributed


Using TensorFlow backend.

Toy data


In [2]:
samples = [
    (u"autót", "BEEEB"),
    (u"autót", "BEEEB"),
    (u"autót", "BEEEB"),
    (u"autót", "BEEEB"),
]

Featurizing the toy dataset


In [3]:
vocab_x = {}
vocab_y = {}

data_x = [[vocab_x.setdefault(c, len(vocab_x)) for c in sample[0]] for sample in samples]
data_y = [to_categorical([vocab_y.setdefault(c, len(vocab_y)) for c in sample[1]]) for sample in samples]
data_x = np.array(data_x)
data_y = np.array(data_y)

Model parameters


In [4]:
timesteps = 5
batch_size = 4
vocab_size = len(vocab_x)
embedding_size = 10
seq_size = 50
mlp_size = len(vocab_y)

Model definition


In [5]:
xin = Input(batch_shape=(batch_size, timesteps), dtype='int32')
xemb = Embedding(vocab_size, embedding_size)(xin)
seq = LSTM(seq_size, return_sequences=True)(xemb)
mlp = TimeDistributed(Dense(mlp_size, activation='softmax'))(seq)
model = Model(inputs=xin, outputs=mlp)
model.compile(optimizer='Adam', loss='categorical_crossentropy')

Training and testing


In [6]:
model.fit(data_x, data_y, epochs=500, verbose=0)


Out[6]:
<keras.callbacks.History at 0x7f3d5a8cc2d0>

In [7]:
model.evaluate(data_x, data_y)


4/4 [==============================] - 0s
Out[7]:
0.00096156785730272532