In [16]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)
# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)
# Showing the data file csv or comma separated value
data[:10]
Out[16]:
In [17]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()
In [18]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -2:], dtype=float)
data_main.shape, data_main.dtype
plt.plot(data_main[:100])
plt.show()
In [19]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype
data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype
Out[19]:
In [20]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape
plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()
In [21]:
X_valid = test_data[0:1378]
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()
In [22]:
# Model or Network
import impl.layer as l
from impl.loss import *
class GRU:
def __init__(self, D, H, p_dropout): #, lam
self.D = D
self.H = H
self.p_dropout = p_dropout
# self.lam = lam
self.losses = {'train':[], 'smooth train':[], 'valid': []}
# Model params
Z = H + D
m = dict(
Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
bz=np.zeros((1, H)),
br=np.zeros((1, H)),
bh=np.zeros((1, H)),
by=np.zeros((1, D))
)
self.model = m
def initial_state(self):
return np.zeros((1, self.H))
def forward(self, X, h, m):
Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']
X_in = X.copy()
h_in = h.copy()
X = np.column_stack((h_in, X_in))
hz, hz_cache = l.fc_forward(X, Wz, bz)
hz, hz_sigm_cache = l.sigmoid_forward(hz)
hr, hr_cache = l.fc_forward(X, Wr, br)
hr, hr_sigm_cache = l.sigmoid_forward(hr)
X = np.column_stack((hr * h_in, X_in))
hh, hh_cache = l.fc_forward(X, Wh, bh)
hh, hh_tanh_cache = l.tanh_forward(hh)
# h = (1. - hz) * h_old + hz * hh
# or
h = ((1. - hz) * h_in) + (hz * hh)
# or
# h = h_in + hz (hh - h_in)
y, y_cache = l.fc_forward(h, Wy, by)
cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache,
y_cache)
return y, h, cache
def backward(self, dy, dh, cache):
h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
dh_out = dh.copy()
dh, dWy, dby = l.fc_backward(dy, y_cache)
dh += dh_out
dh_in1 = (1. - hz) * dh
dhh = hz * dh
dhz = (hh * dh) - (h_in * dh)
# or
# dhz = (hh - h_in) * dh
dhh = l.tanh_backward(dhh, hh_tanh_cache)
dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)
dh = dXh[:, :self.H]
dX_in2 = dXh[:, self.H:]
dh_in2 = hr * dh
dhr = h_in * dh
dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)
dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)
dX = dXr + dXz
dh_in3 = dX[:, :self.H]
dX_in1 = dX[:, self.H:]
dh = dh_in1 + dh_in2 + dh_in3
dX = dX_in1 + dX_in2
grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
return dX, dh, grad
def train_forward(self, X_train, h):
ys, caches, do_caches = [], [], []
for X in X_train:
X = X.reshape(1, -1) # X_1xn
y, h, cache = self.forward(X, h, self.model)
y, do_cache = l.dropout_forward(y, self.p_dropout)
caches.append(cache)
do_caches.append(do_cache)
ys.append(y)
ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
caches_all = (caches, do_caches)
return ys, caches_all
def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
loss, dys = 0.0, []
for y, Y in zip(y_pred, y_train):
# loss += l2_regression_reg(model=self.model, y_pred=y, y_train=Y, lam=self.lam)
loss += l2_regression(y_pred=y, y_train=Y)
dy = dl2_regression(y_pred=y, y_train=Y)
dys.append(dy)
return loss, dys
def train_backward(self, dys, caches_all):
caches, do_caches = caches_all
dh = np.zeros((1, self.H))
grad = {key: np.zeros_like(val) for key, val in self.model.items()}
grads= {key: np.zeros_like(val) for key, val in self.model.items()}
for t in reversed(range(len(dys))):
dy = dys[t].reshape(1, -1) # dy_1xn
dy = l.dropout_backward(dy, do_caches[t])
dX, dh, grad = self.backward(dy, dh, caches[t])
for key in grad.keys():
grads[key] += grad[key]
return dX, grads
def test(self, X_seed, h, size):
ys = []
X = X_seed.reshape(1, -1)
for _ in range(size):
y, h, _ = self.forward(X, h, self.model)
X = y.copy() # previous out for the next input for prediction
ys.append(y) # list array
ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
return ys
In [24]:
def get_minibatch(X, y, minibatch_size, shuffle):
minibatches = []
for i in range(0, X.shape[0], minibatch_size):
# for i in range(0, X.shape[0] - minibatch_size + 1, 1):
X_mini = X[i:i + minibatch_size]
y_mini = y[i:i + minibatch_size]
minibatches.append((X_mini, y_mini))
return minibatches
def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
X_train, y_train = XY_train
X_valid, y_valid = XY_valid
# Momentum
M= {key: np.zeros_like(val) for key, val in nn.model.items()}
R= {key: np.zeros_like(val) for key, val in nn.model.items()}
# Learning decay: suggested by Justin Jhonson in Standford
beta1 = .9
beta2 = .99
state = nn.initial_state()
smooth_loss = 1.
minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
# Epochs: iterating through the whole data
for iter in range(1, n_iter + 1):
# Minibatches
for idx in range(len(minibatches)):
# Train the model
X_mini, y_mini = minibatches[idx]
ys, caches = nn.train_forward(X_mini, state)
loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
_, grads = nn.train_backward(dys, caches)
nn.losses['train'].append(loss)
smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
nn.losses['smooth train'].append(smooth_loss)
# Update the model
for key in grads.keys(): #key, value: items
M[key] = l.exp_running_avg(M[key], grads[key], beta1)
R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
m_k_hat = M[key] / (1. - (beta1** iter))
r_k_hat = R[key] / (1. - (beta2** iter))
nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)
# Validate the model (by testing)
ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
nn.losses['valid'].append(valid_loss)
# Print the model loss/ error
if iter % print_after == 0:
print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))
return nn
In [ ]:
# Hyper-parameters
time_step = 10 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 10 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features
keep_prob = 0.9 # p_dropout == keep_prob: keeping neurons/units function - default 0.95 to 0.9 based on SELU-Dropout
# lam = 1e-4 # regularization: noise in loss feedback
# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=keep_prob) # , lam=lam, L=num_layers, p_dropout=p_dropout
# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
n_iter=n_iter, print_after=print_after)
In [ ]:
import matplotlib.pyplot as plt
plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()
In [ ]:
import matplotlib.pyplot as plt
plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()
In [ ]:
import matplotlib.pyplot as plt
y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape
plt.plot(y_pred[:100], label='y_pred')
plt.plot(Y_valid[:100], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()
In [ ]:
In [ ]: