In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import LSTM, Dense, SimpleRNN, Input, merge, Embedding, TimeDistributed
from keras.objectives import sparse_categorical_crossentropy, mean_squared_error
In [2]:
## Code taken from http://www.rigtorp.se/2011/01/01/rolling-statistics-numpy.html
def rolling_window(a, window):
shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
strides = a.strides + (a.strides[-1],)
return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
In [3]:
X = np.arange(-10,10,0.1)
X = np.cos(np.mean(rolling_window(X, 5), -1))
#X = X[:-5+1]
print(X.shape)
plt.plot(X)
Out[3]:
Replicating data generation from http://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
In [4]:
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
In [5]:
train_size = int(X.shape[0]*0.67)
test_size = X.shape[0] - train_size
train = X[:train_size]
test = X[train_size:]
print(train_size, train.shape, test_size, test.shape)
In [6]:
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
print(trainX.shape, trainY.shape, testX.shape, testY.shape)
In [7]:
print(trainX[:10])
print(trainY[:10])
In [8]:
np.reshape(trainX[:10], (trainX[:10].shape[0], 1, trainX[:10].shape[1])).shape
Out[8]:
In [9]:
# reshape input to be [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
In [10]:
model = Sequential()
model.add(LSTM(4, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
In [11]:
model.fit(trainX, trainY, nb_epoch=10, batch_size=50)
Out[11]:
In [12]:
pred_train = model.predict(trainX)
pred_test = model.predict(testX)
pred_train.shape, pred_test.shape
Out[12]:
In [13]:
plt.plot(X)
plt.plot(np.arange(pred_train.shape[0])+1, pred_train, color='r')
plt.plot(np.arange(pred_test.shape[0])+train_size, pred_test, color='g')
Out[13]:
In [14]:
model = Sequential()
model.add(SimpleRNN(4, input_dim=look_back))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, nb_epoch=10, batch_size=5)
Out[14]:
In [15]:
pred_train = model.predict(trainX)
pred_test = model.predict(testX)
pred_train.shape, pred_test.shape
Out[15]:
In [16]:
plt.plot(X)
plt.plot(np.arange(pred_train.shape[0])+1, pred_train, color='r')
plt.plot(np.arange(pred_test.shape[0])+train_size, pred_test, color='g')
Out[16]:
In [17]:
trainX.shape
Out[17]:
In [18]:
X = np.random.randn(10,3)
W1 = np.random.randn(3,1)
W2 = np.random.randn(2,1)
y1 = np.dot(X, W1)
y2 = np.dot(X[:, :2], W2)
X.shape, y1.shape, y2.shape
Out[18]:
In [19]:
inp1 = Input(name="inp1", shape=(3,))
inp2 = Input(name="inp2", shape=(2,))
In [20]:
dense1 = Dense(10)(inp1)
merge_dens1_inp2 = merge([dense1, inp2], mode='concat')
In [21]:
dense2 = Dense(10)(merge_dens1_inp2)
In [22]:
out1 = Dense(1, name="out1")(dense1)
out2 = Dense(1, name="out2")(dense2)
In [23]:
model1 = Model(input=[inp1], output=[out1])
In [24]:
model1.get_output_at(0) == out1
Out[24]:
In [25]:
model2 = Model(input=[inp1, inp2], output=[out2])
In [26]:
model1.compile(optimizer='sgd', loss='mean_squared_error')
model2.compile(optimizer='sgd', loss='mean_squared_error')
In [27]:
model2.predict([X, X[:, :2]])
Out[27]:
In [28]:
model1.predict([X])
Out[28]:
In [29]:
model2.output_names
Out[29]:
In [30]:
model3 = Model(input=[inp1, inp2], output=[out1, out2])
model3.compile(optimizer='sgd', loss='mean_squared_error')
In [31]:
model3.predict([X, X[:, :2]])
Out[31]:
In [32]:
data = [[1,2,3,4,5],
[2,3,2,2,2]
]
label_pos = [[1,2,2,2,1], None]
label_ner = [None, [1,1,1,2,1]]
In [33]:
input_seq = Input(shape=(5,), name="input_seq")
shared_embedding = Embedding(6, 10)(input_seq)
In [34]:
shared_embedding.get_shape()
Out[34]:
In [35]:
shared_layer = LSTM(10, return_sequences=True)(shared_embedding)
pos_layer = TimeDistributed(Dense(3, activation="softmax"), name="pos_labels")(shared_layer)
ner_layer = TimeDistributed(Dense(3, activation="softmax"), name="ner_labels")(shared_layer)
In [36]:
model = Model(input=input_seq, output=[pos_layer, ner_layer])
In [40]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='sgd', sample_weight_mode="temporal")
In [48]:
max_len = 5
X = np.array(data)
y_pos = np.expand_dims(np.array([np.zeros(max_len) if y is None else y for y in label_pos], dtype='int'), -1)
y_ner = np.expand_dims(np.array([np.zeros(max_len) if y is None else y for y in label_ner], dtype='int'), -1)
y_pos.shape, y_ner.shape
Out[48]:
In [49]:
pos_mask = np.array([np.ones(max_len)*(y is not None) for y in label_pos])
ner_mask = np.array([np.ones(max_len)*(y is not None) for y in label_ner])
pos_mask, ner_mask
Out[49]:
In [50]:
model.fit(X, [y_pos, y_ner], sample_weight=[pos_mask, ner_mask])
Out[50]:
In [ ]:
In [ ]:
In [34]:
t = mean_squared_error(np.random.randn(10,2), np.random.randn(10,2))
t
Out[34]:
In [35]:
sess = tf.Session()
with sess.as_default():
print(t.eval())
In [36]:
t = mean_squared_error(np.random.randn(10,2,3), np.random.randn(10,2,3))
t
Out[36]:
In [37]:
sess = tf.Session()
with sess.as_default():
print(t.eval())
In [38]:
def custom_mse_loss(y_true, y_pred):
loss_masks = y_true["masks"]
y_true_vals = y_true["vals"]
mse_loss = mean_squared_error(y_true_vals, y_pred)
mse_loss_masked = loss_masks * mse_loss
return mse_loss_masked
In [39]:
y_pred = np.random.randn(10,5,3)
y_true_vals = np.random.randn(10,5,3)
y_true_masks = np.random.randint(2, size=(10,1))
y_true = {"masks": y_true_masks, "vals": y_true_vals}
In [40]:
t = custom_mse_loss(y_true, y_pred)
t
Out[40]:
In [41]:
y_true_masks
Out[41]:
In [42]:
sess = tf.Session()
with sess.as_default():
print(t.eval())
In [57]:
pos_layer.get_shape()
Out[57]:
In [96]:
def task_based_loss(y_true, y_pred):
print("Using layer: ", y_pred.get_shape(), y_pred.name)
pos_labels = K.cast(y_true[:, :, 0], 'int64')
ner_labels = K.cast(y_true[:, :, 1], 'int64')
#pos_labels = K.cast(y_true[0][:, :, 0], 'int64')
#ner_labels = K.cast(y_true[1][:, :, 0], 'int64')
pos_preds = y_pred[0]
ner_preds = y_pred[1]
pos_loss = sparse_categorical_crossentropy(pos_labels, pos_preds)
ner_loss = sparse_categorical_crossentropy(ner_labels, ner_preds)
#pos_mask = y_true[0][:, :, 1] # POS mask channel
#ner_mask = y_true[1][:, :, 1] # NER mask channel
#pos_loss = pos_loss * pos_mask
#ner_loss = ner_loss * ner_mask
return (pos_loss + ner_loss)/2
In [98]:
In [97]:
model.compile(loss=task_based_loss, optimizer='sgd')
In [99]:
In [100]:
X.shape
Out[100]:
In [101]:
y_pos.shape, pos_mask.shape, y_ner.shape, ner_mask.shape
Out[101]:
In [102]:
In [78]:
y_true = np.dstack([y_pos, y_ner, pos_mask, ner_mask])
In [80]:
y_true.shape
Out[80]:
In [79]:
model.fit(X, y_true)
In [52]:
y_pred = model.predict(X)
y_pred[0].shape, y_pred[1].shape
Out[52]:
In [55]:
y_pred[0].argmax(axis=-1), y_pred[1].argmax(axis=-1)
Out[55]:
In [ ]: