In [40]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
# data_path = 'data/bike_data/hour.csv'
data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[40]:
date calories gsr heart-rate skin-temp steps
0 2014-08-08 15:18Z 0.4 0.0 0.0 0.0 0.0
1 2014-08-08 15:19Z 1.4 0.0 0.0 0.0 0.0
2 2014-08-08 15:20Z 1.4 0.0 0.0 0.0 0.0
3 2014-08-08 15:21Z 1.3 0.0 0.0 0.0 0.0
4 2014-08-08 15:22Z 1.4 0.0 0.0 0.0 0.0
5 2014-08-08 15:23Z 1.4 0.0 0.0 0.0 0.0
6 2014-08-08 15:24Z 1.4 0.0 0.0 0.0 0.0
7 2014-08-08 15:25Z 1.4 0.0 0.0 0.0 0.0
8 2014-08-08 15:26Z 1.4 0.0 0.0 0.0 0.0
9 2014-08-08 15:27Z 1.4 0.0 0.0 0.0 0.0

In [41]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 300].plot()
plt.legend()
plt.show()



In [42]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, 1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()
data_main.shape, data_main.dtype


Out[42]:
((1011347, 5), dtype('float64'))

In [43]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:1000])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[43]:
(8.7430704010522504e-17,
 0.99999999999999956,
 0.99999999999999922,
 (1011347, 5),
 dtype('float64'))

In [44]:
train_data = data_norm[:10000] # the last dim/variable/feature = 1,011,347 = 1,000,000 ONE million
test_data = data_norm[10000:] # the last dim/variable/feature = 1,000,000
train_data.shape, test_data.shape
X_train = train_data[0:9999] # 999,999
Y_train = train_data[1:10000] # 1000,000
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [45]:
X_valid = test_data[0:999]  # 11,347 total remaining
Y_valid = test_data[1:1000]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [46]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, p_dropout, lam):
        self.D = D
        self.H = H
        self.p_dropout = p_dropout
        self.lam = lam
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, caches, do_caches = [], [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model)
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches.append(cache)
            do_caches.append(do_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        
        return ys, caches, do_caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression_reg(model=self.model, y_pred=y, y_train=Y, lam=self.lam)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches, do_caches):
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[t])
            _, dh, grad = self.backward(dy, dh, caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return grads
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [47]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    for iter in range(1, n_iter + 1):
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches, do_caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            grads = nn.train_backward(dys, caches, do_caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [ ]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256, RAM, L1, L2 Cache
n_iter = 150 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn, shape[0]==t, shape[1]=n
keep_prob = 0.9 # p_dropout == keep_prob: keeping neurons/units - default 0.95 to 0.9 based on SELU-Dropout
lam = 1e-4 # regularization

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=keep_prob, lam=lam) #, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 13.08543271, valid loss: 1162.75434055
Iter-2, train loss: 12.16179848, valid loss: 1827.04538674
Iter-3, train loss: 11.81193708, valid loss: 2078.85347956
Iter-4, train loss: 11.38112413, valid loss: 2299.98243457
Iter-5, train loss: 10.92883935, valid loss: 2485.31235491
Iter-6, train loss: 10.09588518, valid loss: 2680.60155887
Iter-7, train loss: 10.63696776, valid loss: 2850.95786596
Iter-8, train loss: 9.72961641, valid loss: 3016.65774180
Iter-9, train loss: 9.78746102, valid loss: 3182.26144833
Iter-10, train loss: 8.97592483, valid loss: 3332.96858536
Iter-11, train loss: 8.41985591, valid loss: 3498.73739668
Iter-12, train loss: 7.92538970, valid loss: 3644.33209640
Iter-13, train loss: 7.61722210, valid loss: 3801.77370839
Iter-14, train loss: 7.45129425, valid loss: 3951.58332476
Iter-15, train loss: 8.24891298, valid loss: 4105.20704299
Iter-16, train loss: 7.03340127, valid loss: 4244.29676722
Iter-17, train loss: 6.75321554, valid loss: 4369.64883497
Iter-18, train loss: 6.68043958, valid loss: 4514.93695739
Iter-19, train loss: 5.84972854, valid loss: 4669.05541727
Iter-20, train loss: 7.11646929, valid loss: 4806.63133608
Iter-21, train loss: 5.61101128, valid loss: 4945.12373038
Iter-22, train loss: 6.92996890, valid loss: 5065.12731725
Iter-23, train loss: 5.94251684, valid loss: 5201.36569953
Iter-24, train loss: 6.43916040, valid loss: 5314.66671391
Iter-25, train loss: 5.15879906, valid loss: 5451.59082927
Iter-26, train loss: 5.64256724, valid loss: 5586.43075259
Iter-27, train loss: 5.41718591, valid loss: 5697.28209403
Iter-28, train loss: 5.92218701, valid loss: 5848.68608783
Iter-29, train loss: 6.94338864, valid loss: 5974.28414508
Iter-30, train loss: 5.15968081, valid loss: 6097.55021623
Iter-31, train loss: 4.31327408, valid loss: 6275.88218987
Iter-32, train loss: 4.34632384, valid loss: 6414.19246775
Iter-33, train loss: 4.30747419, valid loss: 6562.45204821
Iter-34, train loss: 4.77383861, valid loss: 6721.95846020
Iter-35, train loss: 4.05060943, valid loss: 6886.52909582
Iter-36, train loss: 4.97888776, valid loss: 7040.66350983
Iter-37, train loss: 4.64336965, valid loss: 7201.06461274
Iter-38, train loss: 4.25192331, valid loss: 7376.45179774
Iter-39, train loss: 3.81418718, valid loss: 7579.95548090
Iter-40, train loss: 4.90241888, valid loss: 7735.61521040
Iter-41, train loss: 4.24627118, valid loss: 7913.61245833
Iter-42, train loss: 5.59035495, valid loss: 8095.41511751
Iter-43, train loss: 3.86512487, valid loss: 8192.65063652
Iter-44, train loss: 5.87763679, valid loss: 8373.22071109
Iter-45, train loss: 3.76179964, valid loss: 8553.55087382
Iter-46, train loss: 3.98213073, valid loss: 8711.19284442
Iter-47, train loss: 4.05459706, valid loss: 8849.63492693
Iter-48, train loss: 5.02380406, valid loss: 8980.45095971
Iter-49, train loss: 4.27576483, valid loss: 9110.94860469
Iter-50, train loss: 4.36877154, valid loss: 9249.57003166
Iter-51, train loss: 3.80248266, valid loss: 9432.25258971
Iter-52, train loss: 3.59871516, valid loss: 9566.99088456
Iter-53, train loss: 3.26035052, valid loss: 9691.41832962
Iter-54, train loss: 4.65169823, valid loss: 9758.60362266
Iter-55, train loss: 4.98472207, valid loss: 9837.03140750
Iter-56, train loss: 4.03045521, valid loss: 9964.91850330
Iter-57, train loss: 4.76183762, valid loss: 10085.76603805
Iter-58, train loss: 3.20162048, valid loss: 10327.94684113
Iter-59, train loss: 3.74602390, valid loss: 10372.23732306
Iter-60, train loss: 3.16981721, valid loss: 10397.39297840
Iter-61, train loss: 5.36572711, valid loss: 10300.58494272
Iter-62, train loss: 4.26683906, valid loss: 10520.29881047
Iter-63, train loss: 3.66122145, valid loss: 10479.37166238
Iter-64, train loss: 4.22287224, valid loss: 10796.97888176
Iter-65, train loss: 4.42807567, valid loss: 10836.93904530
Iter-66, train loss: 3.76873542, valid loss: 10759.85999416
Iter-67, train loss: 3.94822255, valid loss: 10765.55079601
Iter-68, train loss: 4.12757849, valid loss: 10774.26533486
Iter-69, train loss: 3.64667450, valid loss: 10934.01000475

In [ ]:
# Display the learning curve and losses for training, validation, and testing
# % matplotlib inline
# import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Smooth train loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:3000, 1], label='y_pred')
plt.plot(Y_valid[:3000, 1], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()

In [ ]: