In [1]:
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.layers import Input, Dense, Embedding
from keras.layers.recurrent import LSTM
from keras.models import Model
from keras.layers.wrappers import TimeDistributed
In [2]:
samples = [
(u"autót", "BEEEB"),
(u"autót", "BEEEB"),
(u"autót", "BEEEB"),
(u"autót", "BEEEB"),
]
In [3]:
vocab_x = {}
vocab_y = {}
data_x = [[vocab_x.setdefault(c, len(vocab_x)) for c in sample[0]] for sample in samples]
data_y = [to_categorical([vocab_y.setdefault(c, len(vocab_y)) for c in sample[1]]) for sample in samples]
data_x = np.array(data_x)
data_y = np.array(data_y)
In [4]:
timesteps = 5
batch_size = 4
vocab_size = len(vocab_x)
embedding_size = 10
seq_size = 50
mlp_size = len(vocab_y)
In [5]:
xin = Input(batch_shape=(batch_size, timesteps), dtype='int32')
xemb = Embedding(vocab_size, embedding_size)(xin)
seq = LSTM(seq_size, return_sequences=True)(xemb)
mlp = TimeDistributed(Dense(mlp_size, activation='softmax'))(seq)
model = Model(inputs=xin, outputs=mlp)
model.compile(optimizer='Adam', loss='categorical_crossentropy')
In [6]:
model.fit(data_x, data_y, epochs=500, verbose=0)
Out[6]:
In [7]:
model.evaluate(data_x, data_y)
Out[7]: