@sunilmallya
@jrhunt
MXNet cheat sheet: https://s3.amazonaws.com/aws-bigdata-blog/artifacts/apache_mxnet/apache-mxnet-cheat.pdf
ht = activation(XtWx + ht-1Wh )
In [ ]:
LSTMs -- Long Short Term Memory
In [ ]:
pip install mxnet
pip install numpy
pip install matplotlib
pip install pandas
In [5]:
# load the
!wget https://raw.githubusercontent.com/sunilmallya/mxnet-notebooks/master/python/tutorials/data/p2-east-1b.csv
In [11]:
import pandas
import numpy as np
import mxnet as mx
import matplotlib.pyplot as plt
%matplotlib inline
df = pandas.read_csv('p2-east-1b.csv', usecols=[0,4], names=['date', 'cost'])
df.head()
Out[11]:
In [8]:
import dateutil.parser
values = df.values[::-1]
ticks = map(dateutil.parser.parse, values[:,0])
dataset = values[:,1]
dataset.shape
Out[8]:
In [9]:
plt.plot(ticks, dataset)
Out[9]:
In [13]:
# Tx = Tx-1, Tx-2 ; Window size = 2
from sklearn.preprocessing import MinMaxScaler
dataset = np.reshape(dataset, (len(dataset), 1))
scaler = MinMaxScaler(feature_range=(0,1))
scaled_dataset = scaler.fit_transform(dataset)
dataset[:5], scaled_dataset[:5]
Out[13]:
In [16]:
# align the data
seq_len = 2
x = scaled_dataset
y = scaled_dataset[: ,[-1]]
x[:5], y[:5]
dataX = []; dataY = []
for i in range(0, len(y)-seq_len):
_x = x[i: i+seq_len]
_y = y[i+seq_len]
dataX.append(_x)
dataY.append(_y)
dataX[0], dataY[0]
# Tx0, Tx1 => Tx2
# Tx1, Tx2 => Tx3
Out[16]:
In [18]:
dataX[1], dataY[2]
Out[18]:
In [47]:
# Define Itertors
#split the data
train_size = int(len(dataY) * 0.7)
test_size = len(dataY) - train_size
batch_size = 32
trainX, testX = np.array(dataX[:train_size]), np.array(dataX[train_size:])
trainY, testY = np.array(dataY[:train_size]), np.array(dataY[train_size:])
train_iter = mx.io.NDArrayIter(data=trainX, label=trainY,
batch_size=batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(data=testX, label=testY,
batch_size=batch_size, shuffle=False)
trainX.shape
Out[47]:
In [ ]:
In [48]:
# Lets build the network
data = mx.sym.var("data")
data = mx.sym.transpose(data, axes=(1,0,2))
# T N C -- Time Steps/ Seq len; N - Batch Size, C - dimensions in the hidden state
'''
Long-Short Term Memory (LSTM) network cell.
Parameters:
num_hidden (int) – Number of units in output symbol.
prefix (str, default ‘lstm_‘) – Prefix for name of layers (and name of weight if params is None).
params (RNNParams, default None) – Container for weight sharing between cells. Created if None.
forget_bias (bias added to forget gate, default 1.0.) – Jozefowicz et al. 2015 recommends setting this to 1.0
'''
lstm1 = mx.rnn.LSTMCell(num_hidden=5, prefix='lstm1')
lstm2 = mx.rnn.LSTMCell(num_hidden=10, prefix='lstm2')
L1, states = lstm1.unroll(length=seq_len, inputs=data, merge_outputs=True, layout="TNC")
L2, L2_states = lstm2.unroll(length=seq_len, inputs=L1, merge_outputs=True, layout="TNC")
L2_reshape = mx.sym.reshape(L2_states[0], shape=(-1, 0), reverse=True) # (T*N, 10 -- num_hidden lstm2)
fc = mx.sym.FullyConnected(L2_reshape, num_hidden=1, name='fc')
net = mx.sym.LinearRegressionOutput(data=fc, name="softmax")
#mx.viz.plot_network(net) #, shape=(1,2,2))
In [57]:
# Training
import logging
logging.getLogger().setLevel(logging.DEBUG)
num_epochs = 2
model = mx.mod.Module(symbol=net, context=mx.cpu(0))
model.fit(train_data=train_iter, eval_data=val_iter,
optimizer="adam",
optimizer_params={'learning_rate': 1E-3},
eval_metric="mse",
num_epoch=num_epochs
)
In [58]:
test_pred = model.predict(val_iter).asnumpy()
#type(test_pred)
print np.mean((test_pred - testY)**2)
test_plot = scaler.inverse_transform(test_pred)
test_plot[:5], testY[:5]
Out[58]:
In [59]:
plt.plot(ticks[train_size+seq_len:], test_plot)
Out[59]:
In [60]:
t_plot = np.empty_like(dataset)
t_plot[:] = np.nan
t_plot[len(trainY): -seq_len] = test_plot
plt.plot(ticks, dataset, label="real data")
plt.plot(ticks, t_plot, label= "pred")
plt.legend()
Out[60]:
In [ ]:
In [ ]:
In [ ]: