notebook.community

Edit and run



In [5]:

    
import numpy as np
import random



In [20]:

    
# generate a sequence of random number in [0, n_features)
def generate_sequence(length, n_features):
    return [random.randint(0, n_features - 1) for _ in range(length)]

# one hot encode sequence
def one_hot_encode(sequence, n_features):
    encoding = list()
    for value in sequence:
        vector = [0 for _ in range(n_features)]
        vector[value] = 1
        encoding.append(vector)
    return np.array(encoding)

# decode a one hot encoded string
def one_hot_decode(encoded_seq):
    return [np.argmax(vector) for vector in encoded_seq]



In [25]:

    
sequence = generate_sequence(25, 100)
print(sequence)









    



[96, 35, 85, 73, 56, 22, 37, 25, 66, 75, 6, 46, 77, 84, 77, 53, 38, 85, 0, 61, 37, 55, 31, 0, 89]



In [26]:

    
encoded = one_hot_encode(sequence, 100)
print(encoded)









    



[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [1 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]



In [27]:

    
decoded = one_hot_decode(encoded)
print(decoded)









    



[96, 35, 85, 73, 56, 22, 37, 25, 66, 75, 6, 46, 77, 84, 77, 53, 38, 85, 0, 61, 37, 55, 31, 0, 89]



In [28]:

    
# 25個の時系列データ（各データの特徴量は100次元ベクトル）
encoded.shape









    Out[28]:





(25, 100)



In [29]:

    
# LSTMへ入力できるように (samples, time steps, features) の3Dテンソルに変換
X = encoded.reshape(1, 25, 100)



In [30]:

    
X.shape









    Out[30]:





(1, 25, 100)



In [32]:

    
y = encoded[1, :]
y.shape









    Out[32]:





(100,)



In [33]:

    
# generate one example for an lstm
def generate_example(length, n_features, out_index):
    # generate sequence
    sequence = generate_sequence(length, n_features)
    # one hot encode
    encoded = one_hot_encode(sequence, n_features)
    # reshape sequence to be 3D
    X = encoded.reshape((1, length, n_features))
    # select output
    y = encoded[out_index].reshape(1, n_features)
    return X, y



In [45]:

    
# 訓練用のデータを1サンプルだけ生成
# ランダムに生成した系列の2番目を正解とする
X, y = generate_example(25, 100, 2)
print(X.shape)
print(y.shape)
print(one_hot_decode(X[0, :]))
print(one_hot_decode(y))









    



(1, 25, 100)
(1, 100)
[27, 75, 79, 37, 99, 88, 42, 98, 95, 53, 25, 48, 35, 53, 39, 27, 25, 79, 78, 91, 46, 72, 35, 53, 86]
[79]



In [69]:

    
from keras.models import Sequential
from keras.layers import LSTM, Dense
# define model
length = 5  # 系列長
n_features = 10  # 特徴量の次元
out_index = 2  # 入力系列中で出力したい値のインデックス
model = Sequential()
model.add(LSTM(25, input_shape=(length, n_features)))
model.add(Dense(n_features, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
print(model.summary())









    



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_2 (LSTM)                (None, 25)                3600      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                260       
=================================================================
Total params: 3,860
Trainable params: 3,860
Non-trainable params: 0
_________________________________________________________________
None



In [70]:

    
from keras.utils.vis_utils import plot_model
from IPython.display import Image
plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=True)
Image('model.png')









    Out[70]:



In [71]:

    
# fit model
loss = []
for i in range(10000):
    # 1つだけサンプル生成して1エポック学習
    X, y = generate_example(length, n_features, out_index)
    h = model.fit(X, y, epochs=1, verbose=0)
    loss.append(h.history['loss'][0])



In [72]:

    
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(loss)









    Out[72]:





[<matplotlib.lines.Line2D at 0x32a4d7780>]



In [76]:

    
# TODO: 1サンプルずつではなくまとめて生成してミニバッチ学習したらどうなる？
train_data = []
train_label = []
for i in range(10000):
    X, y = generate_example(length, n_features, out_index)
    train_data.append(X)
    train_label.append(y)
train_data = np.array(train_data)
train_label = np.array(train_label)
train_data = train_data.reshape(10000, length, n_features)
train_label = train_label.reshape(10000, n_features)

# create new model
model = Sequential()
model.add(LSTM(25, input_shape=(length, n_features)))
model.add(Dense(n_features, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

hist = model.fit(train_data, train_label, batch_size=100, epochs=20, verbose=2)









    



Epoch 1/20
2s - loss: 2.2354 - acc: 0.2554
Epoch 2/20
2s - loss: 1.9350 - acc: 0.3484
Epoch 3/20
2s - loss: 1.4394 - acc: 0.4702
Epoch 4/20
2s - loss: 0.9583 - acc: 0.6595
Epoch 5/20
2s - loss: 0.7017 - acc: 0.7745
Epoch 6/20
2s - loss: 0.5223 - acc: 0.8535
Epoch 7/20
2s - loss: 0.3804 - acc: 0.9058
Epoch 8/20
2s - loss: 0.2697 - acc: 0.9405
Epoch 9/20
2s - loss: 0.1866 - acc: 0.9675
Epoch 10/20
2s - loss: 0.1286 - acc: 0.9846
Epoch 11/20
2s - loss: 0.0889 - acc: 0.9927
Epoch 12/20
2s - loss: 0.0627 - acc: 0.9968
Epoch 13/20
2s - loss: 0.0452 - acc: 0.9981
Epoch 14/20
2s - loss: 0.0333 - acc: 0.9994
Epoch 15/20
2s - loss: 0.0253 - acc: 0.9998
Epoch 16/20
2s - loss: 0.0196 - acc: 0.9999
Epoch 17/20
2s - loss: 0.0156 - acc: 1.0000
Epoch 18/20
2s - loss: 0.0126 - acc: 0.9999
Epoch 19/20
2s - loss: 0.0104 - acc: 1.0000
Epoch 20/20
2s - loss: 0.0086 - acc: 1.0000



In [68]:

    
print(train_data.shape)
print(train_label.shape)









    



(10000, 5, 10)
(10000, 10)



In [82]:

    
plt.plot(hist.history['loss'])









    Out[82]:





[<matplotlib.lines.Line2D at 0x32c27e4e0>]



In [84]:

    
plt.plot(hist.history['acc'])









    Out[84]:





[<matplotlib.lines.Line2D at 0x32c0f8898>]



In [85]:

    
# evaluate model
correct = 0
for i in range(100):
    X, y = generate_example(length, n_features, out_index)
    yhat = model.predict(X)
    if one_hot_decode(yhat) == one_hot_decode(y):
        correct += 1
print('Accuracy: %f' % ((correct / 100) * 100.0))









    



Accuracy: 100.000000



In [87]:

    
# prediction on new data
X, y = generate_example(length, n_features, out_index)
yhat = model.predict(X)
print('Sequence: %s' % [one_hot_decode(x) for x in X])
print('Expected: %s' % one_hot_decode(y))
print('Predicted: %s' % one_hot_decode(yhat))









    



Sequence: [[8, 2, 7, 9, 7]]
Expected: [7]
Predicted: [7]



In [88]:

    
# TODO: 6.9 Extensionsの実験をする