In [29]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[29]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [30]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [31]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [32]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[32]:
(-1.0548364452851478e-16, 1.0, 1.0, (17379, 1), dtype('float64'))

In [33]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [34]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [41]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, L):
        self.D = D
        self.H = H
        self.L = L
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        low, high = -1.0, 1.0
        self.model = []
        for _ in range(self.L):
            m = dict(
                # Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
                # Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
                # Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
                # Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
                Wz=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
                Wr=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
                Wh=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
                Wy=np.random.uniform(size=(H, D), low=low, high=high) / np.sqrt(H / 2.),
                bz=np.zeros((1, H)),
                br=np.zeros((1, H)),
                bh=np.zeros((1, H)),
                by=np.zeros((1, D))
            )
            self.model.append(m)
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache) # w_fixed/fb

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache) # w_fixed_fb

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache) # w_fixed_fb

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches, caches = [], [], []
        
        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            for layer in range(self.L):
                y, h, fc_cache = self.forward(X, h, self.model[layer])
                X = y.copy()
                fc_caches.append(fc_cache) # layers
            caches.append(fc_caches) # time
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model[0].items()}
        grads = [] #{key: np.zeros_like(val) for key, val in self.model.items()}
        for _ in range(self.L):
            grads.append(grad)

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            fc_caches = caches[t]
            for layer in reversed(range(self.L)):
                dX, dh, grad = self.backward(dy, dh, fc_caches[layer])
                dy = dX.copy() # for the previous layer
                for key in grad.keys():
                    grads[layer][key] += grad[key]
                
        return dX, grads
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            for layer in range(self.L):
                y, h, _ = self.forward(X, h, self.model[layer])
                X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [42]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    R = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    for _ in range(nn.L):
        M.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for layer in range(nn.L):
                for key in grads[0].keys(): # key, value, items
                    M[layer][key] = l.exp_running_avg(M[layer][key], grads[layer][key], beta1)
                    R[layer][key] = l.exp_running_avg(R[layer][key], grads[layer][key]**2, beta2)
                    m_k_hat = M[layer][key] / (1. - (beta1** iter))
                    r_k_hat = R[layer][key] / (1. - (beta2** iter))
                    nn.model[layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [ ]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 200 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features
num_hidden_layers = 2 # number of hidden layers

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, L=num_hidden_layers)

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 55.57628398, valid loss: 632.87992596
Iter-2, train loss: 52.66144236, valid loss: 632.83862461
Iter-3, train loss: 50.36786182, valid loss: 632.84819997
Iter-4, train loss: 48.37065027, valid loss: 632.90017556
Iter-5, train loss: 46.58416436, valid loss: 633.00094009
Iter-6, train loss: 44.99917810, valid loss: 633.16816775
Iter-7, train loss: 43.65534701, valid loss: 633.43231045
Iter-8, train loss: 42.62457816, valid loss: 633.84936575
Iter-9, train loss: 42.02455450, valid loss: 634.52371008
Iter-10, train loss: 42.01768193, valid loss: 635.69131488
Iter-11, train loss: 42.82325574, valid loss: 638.10952806
Iter-12, train loss: 44.54018609, valid loss: 646.11474073
Iter-13, train loss: 46.72635033, valid loss: 1287.96822674
Iter-14, train loss: 48.28318541, valid loss: 2142.97265899
Iter-15, train loss: 48.60472830, valid loss: 2324.94073774
Iter-16, train loss: 48.49430902, valid loss: 2330.80071272
Iter-17, train loss: 48.41404418, valid loss: 2334.33819074
Iter-18, train loss: 48.34450745, valid loss: 2338.25473258
Iter-19, train loss: 48.27651542, valid loss: 2342.25008190
Iter-20, train loss: 48.20808662, valid loss: 2346.26913509
Iter-21, train loss: 48.13870086, valid loss: 2350.25420149
Iter-22, train loss: 48.06827604, valid loss: 2354.19348089
Iter-23, train loss: 47.99681612, valid loss: 2358.08504874
Iter-24, train loss: 47.92434711, valid loss: 2361.93118500
Iter-25, train loss: 47.85089883, valid loss: 2365.73500086
Iter-26, train loss: 47.77650130, valid loss: 2369.49963204
Iter-27, train loss: 47.70118385, valid loss: 2373.22790025
Iter-28, train loss: 47.62497481, valid loss: 2376.92215084
Iter-29, train loss: 47.54790137, valid loss: 2380.58410949
Iter-30, train loss: 47.46998944, valid loss: 2384.21473948
Iter-31, train loss: 47.39126364, valid loss: 2387.81411335
Iter-32, train loss: 47.31174738, valid loss: 2391.38133312
Iter-33, train loss: 47.23146329, valid loss: 2394.91454077
Iter-34, train loss: 47.15043388, valid loss: 2398.41105628
Iter-35, train loss: 47.06868258, valid loss: 2401.86765708
Iter-36, train loss: 46.98623502, valid loss: 2405.28097099
Iter-37, train loss: 46.90312029, valid loss: 2408.64790505
Iter-38, train loss: 46.81937207, valid loss: 2411.96599589
Iter-39, train loss: 46.73502917, valid loss: 2415.23356806
Iter-40, train loss: 46.65013544, valid loss: 2418.44963224
Iter-41, train loss: 46.56473902, valid loss: 2421.61353192
Iter-42, train loss: 46.47889096, valid loss: 2424.72441663
Iter-43, train loss: 46.39264351, valid loss: 2427.78064436
Iter-44, train loss: 46.30604810, valid loss: 2430.77917279
Iter-45, train loss: 46.21915300, valid loss: 2433.71493470
Iter-46, train loss: 46.13200095, valid loss: 2436.58027072
Iter-47, train loss: 46.04462681, valid loss: 2439.36482511
Iter-48, train loss: 45.95705573, valid loss: 2442.05650021
Iter-49, train loss: 45.86930163, valid loss: 2444.64357315
Iter-50, train loss: 45.78136519, valid loss: 2447.11732571
Iter-51, train loss: 45.69323029, valid loss: 2449.47428720
Iter-52, train loss: 45.60485947, valid loss: 2451.71762584
Iter-53, train loss: 45.51618968, valid loss: 2453.84928385
Iter-54, train loss: 45.42713490, valid loss: 2455.85037192
Iter-55, train loss: 45.33762245, valid loss: 2457.69307471
Iter-56, train loss: 45.24761933, valid loss: 2459.35564345
Iter-57, train loss: 45.15714367, valid loss: 2460.83250599
Iter-58, train loss: 45.06626186, valid loss: 2462.13691935
Iter-59, train loss: 44.97507944, valid loss: 2463.29678940
Iter-60, train loss: 44.88373266, valid loss: 2464.34841905
Iter-61, train loss: 44.79238098, valid loss: 2465.33112171
Iter-62, train loss: 44.70119749, valid loss: 2466.28323118
Iter-63, train loss: 44.61035438, valid loss: 2467.23890931
Iter-64, train loss: 44.52000401, valid loss: 2468.22550326
Iter-65, train loss: 44.43026108, valid loss: 2469.26199235
Iter-66, train loss: 44.34119365, valid loss: 2470.35924310
Iter-67, train loss: 44.25282661, valid loss: 2471.52202052
Iter-68, train loss: 44.16515457, valid loss: 2472.75178223
Iter-69, train loss: 44.07815705, valid loss: 2474.04919653
Iter-70, train loss: 43.99181056, valid loss: 2475.41584347
Iter-71, train loss: 43.90609601, valid loss: 2476.85480748
Iter-72, train loss: 43.82100206, valid loss: 2478.37023034
Iter-73, train loss: 43.73652551, valid loss: 2479.96660434
Iter-74, train loss: 43.65266987, valid loss: 2481.64837385
Iter-75, train loss: 43.56944355, valid loss: 2483.41970394
Iter-76, train loss: 43.48685812, valid loss: 2485.28422429
Iter-77, train loss: 43.40492707, valid loss: 2487.24475686
Iter-78, train loss: 43.32366498, valid loss: 2489.30308803
Iter-79, train loss: 43.24308714, valid loss: 2491.45982148
Iter-80, train loss: 43.16320944, valid loss: 2493.71431944
Iter-81, train loss: 43.08404846, valid loss: 2496.06472350
Iter-82, train loss: 43.00562158, valid loss: 2498.50803790
Iter-83, train loss: 42.92794704, valid loss: 2501.04025585
Iter-84, train loss: 42.85104384, valid loss: 2503.65651025
Iter-85, train loss: 42.77493151, valid loss: 2506.35123240
Iter-86, train loss: 42.69962970, valid loss: 2509.11830539
Iter-87, train loss: 42.62515764, valid loss: 2511.95120196
Iter-88, train loss: 42.55153351, valid loss: 2514.84310094
Iter-89, train loss: 42.47877381, valid loss: 2517.78698141
Iter-90, train loss: 42.40689273, valid loss: 2520.77570094
Iter-91, train loss: 42.33590152, valid loss: 2523.80207420
Iter-92, train loss: 42.26580814, valid loss: 2526.85898093
Iter-93, train loss: 42.19661693, valid loss: 2529.93954501
Iter-94, train loss: 42.12832872, valid loss: 2533.03743276
Iter-95, train loss: 42.06094114, valid loss: 2536.14730532
Iter-96, train loss: 41.99444932, valid loss: 2539.26541209
Iter-97, train loss: 41.92884681, valid loss: 2542.39022629
Iter-98, train loss: 41.86412641, valid loss: 2545.52292750
Iter-99, train loss: 41.80028075, valid loss: 2548.66749586
Iter-100, train loss: 41.73730225, valid loss: 2551.83026550
Iter-101, train loss: 41.67518245, valid loss: 2555.01898380
Iter-102, train loss: 41.61391108, valid loss: 2558.24163117
Iter-103, train loss: 41.55347500, valid loss: 2561.50533915
Iter-104, train loss: 41.49385761, valid loss: 2564.81566413
Iter-105, train loss: 41.43503853, valid loss: 2568.17630359
Iter-106, train loss: 41.37699386, valid loss: 2571.58919183
Iter-107, train loss: 41.31969660, valid loss: 2575.05483835
Iter-108, train loss: 41.26311719, valid loss: 2578.57276816
Iter-109, train loss: 41.20722410, valid loss: 2582.14195467
Iter-110, train loss: 41.15198413, valid loss: 2585.76117107
Iter-111, train loss: 41.09736256, valid loss: 2589.42921003
Iter-112, train loss: 41.04332283, valid loss: 2593.14493014
Iter-113, train loss: 40.98982572, valid loss: 2596.90707806
Iter-114, train loss: 40.93682778, valid loss: 2600.71380935
Iter-115, train loss: 40.88427869, valid loss: 2604.56178151
Iter-116, train loss: 40.83211711, valid loss: 2608.44461204
Iter-117, train loss: 40.78026414, valid loss: 2612.35035305
Iter-118, train loss: 40.72861325, valid loss: 2616.25733883
Iter-119, train loss: 40.67701471, valid loss: 2620.12702096
Iter-120, train loss: 40.62525185, valid loss: 2623.89062482
Iter-121, train loss: 40.57300655, valid loss: 2627.42357348
Iter-122, train loss: 40.51981501, valid loss: 2630.50042742
Iter-123, train loss: 40.46502376, valid loss: 2632.72948325
Iter-124, train loss: 40.40777255, valid loss: 2633.48505889
Iter-125, train loss: 40.34708111, valid loss: 2631.89439098
Iter-126, train loss: 40.28229907, valid loss: 2627.04701114
Iter-127, train loss: 40.21451304, valid loss: 2618.86986372
Iter-128, train loss: 40.14840298, valid loss: 2609.71361210
Iter-129, train loss: 40.08918683, valid loss: 2603.29645131
Iter-130, train loss: 40.03655567, valid loss: 2600.53793797
Iter-131, train loss: 39.98767978, valid loss: 2600.24569704
Iter-132, train loss: 39.94079124, valid loss: 2601.41642106
Iter-133, train loss: 39.89505718, valid loss: 2603.50785491
Iter-134, train loss: 39.85008720, valid loss: 2606.24400327
Iter-135, train loss: 39.80568843, valid loss: 2609.48213477
Iter-136, train loss: 39.76176117, valid loss: 2613.14833010

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:1000], label='y_pred')
plt.plot(Y_valid[:1000], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()

In [ ]:


In [ ]: