In [54]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Showing the data file csv or comma separated value
data[:10]


Out[54]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [55]:
# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [56]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, 2:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [44]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[44]:
(-2.0933188371550222e-17,
 0.99999999999999978,
 0.99999999999999967,
 (17379, 15),
 dtype('float64'))

In [45]:
train_data = data_norm[:17000, -1] # the last dim/variable/feature
test_data = data_norm[17000:, -1] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:16999]
Y_train = train_data[1:17000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [46]:
X_valid = test_data[0:378] 
Y_valid = test_data[1:379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [47]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, p_dropout, lam):
        self.D = D
        self.H = H
        self.p_dropout = p_dropout
        self.lam = lam
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = [] # from left to right
        for _ in range(2):
            self.model.append(m) # bidirectional: from right to left
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, caches, do_caches = [], [], []
        for _ in range(2):
            ys.append([])
            caches.append([])
            do_caches.append([])

        # Left to right
        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model[0])
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches[0].append(cache)
            do_caches[0].append(do_cache)
            ys[0].append(y)
        
        # Bidirectional: right to left
        for X in reversed(X_train):
            X = X.reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model[1])
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches[1].append(cache)
            do_caches[1].append(do_cache)
            ys[1].append(y)
            
        # ys_2xtx1xn
        ys = (np.array(ys[0], dtype=float) + np.array(ys[1], dtype=float))/ 2 # ys_1xtx1xn
        ys = ys.reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        
        return ys, caches, do_caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train): # both self.model[0] & [1] have equal structure or key elements!
            loss += l2_regression_reg(model=self.model[0], y_pred=y, y_train=Y, lam=self.lam)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys # this would be fed into both RNNs directions: l2r & r2l
    
    def train_backward(self, dys, caches, do_caches):
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model[0].items()} # for both is the same
        grads = []
        
        # Gradient for bi-directional RNN or the both-direction of RNN
        for _ in range(2):
            grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Left to right: backward
        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[0][t])
            _, dh, grad = self.backward(dy, dh, caches[0][t])
            for key in grad.keys():
                grads[0][key] += grad[key]
                
        # Right to left: backward
        for t in range(len(dys)):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[1][t])
            _, dh, grad = self.backward(dy, dh, caches[1][t])
            for key in grad.keys():
                grads[1][key] += grad[key]
                
        return grads
    
    def test(self, X_seed, h, size):
        ys = []
        for _ in range(2):
            ys.append([])
            
        X = X_seed.reshape(1, -1)
        
        # Left to right
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model[0])
            X = y.copy() # previous out for the next input for prediction
            ys[0].append(y) # list array
        
        # Right to left
        for _ in reversed(range(size)):
            y, h, _ = self.forward(X, h, self.model[1])
            X = y.copy() # previous out for the next input for prediction
            ys[1].append(y) # list array
            
        # ys_2xtx1xn
        ys = (np.array(ys[0], dtype=float) + np.array(ys[1], dtype=float))/ 2 # ys_1xtx1xn
        # print('ys.shape', ys.shape)
        ys = ys.reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [48]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    M, R = [], []
    for _ in range(2):
        M.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        
    beta1 = .99
    beta2 = .999
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    for iter in range(1, n_iter + 1):
        for idx in range(len(minibatches)):
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches, do_caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            grads = nn.train_backward(dys, caches, do_caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for num in range(2):
                for key in grads[num].keys(): #key, value: items
                    M[num][key] = l.exp_running_avg(M[num][key], grads[num][key], beta1)
                    R[num][key] = l.exp_running_avg(R[num][key], grads[num][key]**2, beta2)
                    m_k_hat = M[num][key] / (1. - (beta1 ** iter))
                    r_k_hat = R[num][key] / (1. - (beta2 ** iter))
                    nn.model[num][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [49]:
# Hyper-parameters
time_step = 100 # minibatch size
n_iter = 200 # epochs
alpha = 1e-4 # learning_rate
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = 1 # X_train.shape[1] # X_txn
p_dropout = 0.95 # p_dropout == keep_prob: probability of keeping neurons/units
lam = 1e-4 # regularization

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=p_dropout, lam=lam) #, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 33.99246544, valid loss: 343.57881641
Iter-2, train loss: 24.93250646, valid loss: 162.15177421
Iter-3, train loss: 20.96521016, valid loss: 151.60562921
Iter-4, train loss: 22.46883312, valid loss: 272.60795039
Iter-5, train loss: 24.92502377, valid loss: 502.56513895
Iter-6, train loss: 18.70467447, valid loss: 276.46202123
Iter-7, train loss: 18.61615278, valid loss: 340.56802906
Iter-8, train loss: 22.87142505, valid loss: 494.79212649
Iter-9, train loss: 20.28228198, valid loss: 319.64235504
Iter-10, train loss: 21.14364360, valid loss: 411.96446963
Iter-11, train loss: 20.69782646, valid loss: 481.05872486
Iter-12, train loss: 16.48876657, valid loss: 435.05972683
Iter-13, train loss: 17.58755808, valid loss: 358.57065055
Iter-14, train loss: 16.73245514, valid loss: 423.06551270
Iter-15, train loss: 19.37921512, valid loss: 496.29103166
Iter-16, train loss: 19.39028928, valid loss: 384.39553849
Iter-17, train loss: 18.37520346, valid loss: 457.83349693
Iter-18, train loss: 20.53324051, valid loss: 450.81331567
Iter-19, train loss: 17.98369919, valid loss: 497.43905592
Iter-20, train loss: 20.17900253, valid loss: 525.91659895
Iter-21, train loss: 21.09053991, valid loss: 619.20761459
Iter-22, train loss: 21.66098547, valid loss: 610.52310939
Iter-23, train loss: 22.45466883, valid loss: 615.94117688
Iter-24, train loss: 21.13186782, valid loss: 1103.56915479
Iter-25, train loss: 21.44433103, valid loss: 1077.58084262
Iter-26, train loss: 20.39510876, valid loss: 1158.67204873
Iter-27, train loss: 21.06316704, valid loss: 1138.81478485
Iter-28, train loss: 20.28404654, valid loss: 1064.14823472
Iter-29, train loss: 20.07088682, valid loss: 1091.22484579
Iter-30, train loss: 22.66516922, valid loss: 1041.17879462
Iter-31, train loss: 19.27542362, valid loss: 964.85985058
Iter-32, train loss: 22.56799994, valid loss: 977.69966115
Iter-33, train loss: 22.07414145, valid loss: 871.26195933
Iter-34, train loss: 21.87079639, valid loss: 847.73060544
Iter-35, train loss: 20.32552499, valid loss: 760.71271968
Iter-36, train loss: 20.99938467, valid loss: 635.04243276
Iter-37, train loss: 22.63197761, valid loss: 565.64513887
Iter-38, train loss: 21.39185155, valid loss: 514.07817361
Iter-39, train loss: 22.96343140, valid loss: 451.18436911
Iter-40, train loss: 20.23527147, valid loss: 456.01043372
Iter-41, train loss: 20.97791611, valid loss: 396.83794898
Iter-42, train loss: 21.52813316, valid loss: 378.52096346
Iter-43, train loss: 22.22300856, valid loss: 367.45539531
Iter-44, train loss: 25.58585265, valid loss: 357.23350577
Iter-45, train loss: 21.74436954, valid loss: 347.72203541
Iter-46, train loss: 20.00598072, valid loss: 360.66505668
Iter-47, train loss: 22.86109146, valid loss: 388.97527577
Iter-48, train loss: 18.55372771, valid loss: 373.62702748
Iter-49, train loss: 19.20198306, valid loss: 384.36155523
Iter-50, train loss: 21.92628206, valid loss: 386.57422248
Iter-51, train loss: 20.26481479, valid loss: 400.61458814
Iter-52, train loss: 18.54863064, valid loss: 418.07521738
Iter-53, train loss: 19.98798448, valid loss: 397.43696041
Iter-54, train loss: 19.86830990, valid loss: 415.91756078
Iter-55, train loss: 20.52527648, valid loss: 443.33497155
Iter-56, train loss: 20.31582102, valid loss: 421.83264705
Iter-57, train loss: 22.04248235, valid loss: 429.53494143
Iter-58, train loss: 19.27655582, valid loss: 444.43093058
Iter-59, train loss: 21.44390267, valid loss: 445.52771741
Iter-60, train loss: 21.86884236, valid loss: 446.44818141
Iter-61, train loss: 19.40393495, valid loss: 445.94610349
Iter-62, train loss: 19.56642144, valid loss: 446.38704029
Iter-63, train loss: 22.09550283, valid loss: 436.45025041
Iter-64, train loss: 21.84621505, valid loss: 427.62302543
Iter-65, train loss: 20.57571196, valid loss: 427.69051721
Iter-66, train loss: 22.20334177, valid loss: 468.85952366
Iter-67, train loss: 21.84795712, valid loss: 435.94599720
Iter-68, train loss: 19.45875993, valid loss: 440.34055048
Iter-69, train loss: 21.29321416, valid loss: 432.31891207
Iter-70, train loss: 21.13236517, valid loss: 426.32393702
Iter-71, train loss: 23.65718632, valid loss: 439.27777250
Iter-72, train loss: 19.32302623, valid loss: 438.17916739
Iter-73, train loss: 20.84948759, valid loss: 433.39230083
Iter-74, train loss: 18.05228018, valid loss: 439.04244332
Iter-75, train loss: 18.29602485, valid loss: 451.17855631
Iter-76, train loss: 18.67018065, valid loss: 448.10142500
Iter-77, train loss: 21.12343802, valid loss: 444.87826144
Iter-78, train loss: 19.39322322, valid loss: 438.36881964
Iter-79, train loss: 20.15478037, valid loss: 452.61833687
Iter-80, train loss: 22.70071015, valid loss: 440.17235713
Iter-81, train loss: 20.17773645, valid loss: 444.51442393
Iter-82, train loss: 19.41780748, valid loss: 450.74917698
Iter-83, train loss: 20.83789296, valid loss: 444.77696442
Iter-84, train loss: 20.09971365, valid loss: 442.53022988
Iter-85, train loss: 17.47565007, valid loss: 437.09521980
Iter-86, train loss: 18.56978053, valid loss: 430.82252357
Iter-87, train loss: 19.28727833, valid loss: 444.15745529
Iter-88, train loss: 17.67323655, valid loss: 437.62015535
Iter-89, train loss: 21.70153902, valid loss: 439.14370218
Iter-90, train loss: 20.91214825, valid loss: 439.05579672
Iter-91, train loss: 20.17019404, valid loss: 452.93187495
Iter-92, train loss: 20.16404849, valid loss: 479.86966180
Iter-93, train loss: 18.24917156, valid loss: 454.31882655
Iter-94, train loss: 18.10118680, valid loss: 438.18768807
Iter-95, train loss: 18.01448689, valid loss: 431.32549278
Iter-96, train loss: 19.62712962, valid loss: 422.58115523
Iter-97, train loss: 17.87538441, valid loss: 423.92277666
Iter-98, train loss: 16.56781773, valid loss: 408.70265813
Iter-99, train loss: 18.79969851, valid loss: 407.82560812
Iter-100, train loss: 21.02656265, valid loss: 388.75888749
Iter-101, train loss: 21.86493955, valid loss: 361.84769538
Iter-102, train loss: 21.00733083, valid loss: 337.39747723
Iter-103, train loss: 24.86744543, valid loss: 351.77737749
Iter-104, train loss: 19.84052572, valid loss: 401.21070656
Iter-105, train loss: 21.06063493, valid loss: 413.42431687
Iter-106, train loss: 19.41004497, valid loss: 406.72738270
Iter-107, train loss: 21.41527715, valid loss: 414.17833163
Iter-108, train loss: 20.59526641, valid loss: 385.18928638
Iter-109, train loss: 18.76112822, valid loss: 398.17282427
Iter-110, train loss: 22.36491499, valid loss: 395.37423509
Iter-111, train loss: 22.09784670, valid loss: 378.46874612
Iter-112, train loss: 20.77176535, valid loss: 381.65195564
Iter-113, train loss: 21.67162098, valid loss: 396.72303881
Iter-114, train loss: 22.52409609, valid loss: 395.08991909
Iter-115, train loss: 24.52445541, valid loss: 372.74829729
Iter-116, train loss: 24.28746673, valid loss: 374.51947550
Iter-117, train loss: 26.47835801, valid loss: 396.54656433
Iter-118, train loss: 21.70088199, valid loss: 396.05609123
Iter-119, train loss: 23.87005304, valid loss: 397.99251327
Iter-120, train loss: 23.70694385, valid loss: 407.85686370
Iter-121, train loss: 23.38391323, valid loss: 410.88458832
Iter-122, train loss: 22.21810259, valid loss: 415.41336899
Iter-123, train loss: 21.61472134, valid loss: 418.34443823
Iter-124, train loss: 27.56068301, valid loss: 420.66110721
Iter-125, train loss: 26.24004671, valid loss: 432.79395169
Iter-126, train loss: 24.10203826, valid loss: 432.59145989
Iter-127, train loss: 25.98148374, valid loss: 451.94686375
Iter-128, train loss: 20.88233494, valid loss: 469.59690562
Iter-129, train loss: 22.01373613, valid loss: 451.93858257
Iter-130, train loss: 23.70491223, valid loss: 585.84561020
Iter-131, train loss: 23.84373764, valid loss: 561.74304769
Iter-132, train loss: 23.90134445, valid loss: 524.71653503
Iter-133, train loss: 23.76436864, valid loss: 516.09890567
Iter-134, train loss: 23.46021512, valid loss: 511.49716051
Iter-135, train loss: 21.27011576, valid loss: 541.11250172
Iter-136, train loss: 23.61887665, valid loss: 550.22906406
Iter-137, train loss: 23.32886645, valid loss: 553.25030890
Iter-138, train loss: 23.25038958, valid loss: 565.27498680
Iter-139, train loss: 23.05867651, valid loss: 617.40457044
Iter-140, train loss: 22.80261350, valid loss: 603.07939387
Iter-141, train loss: 29.20012195, valid loss: 656.92120458
Iter-142, train loss: 25.03802758, valid loss: 679.09618205
Iter-143, train loss: 23.17203896, valid loss: 657.04692902
Iter-144, train loss: 24.53805286, valid loss: 648.58046564
Iter-145, train loss: 26.07643983, valid loss: 662.60489124
Iter-146, train loss: 23.05974005, valid loss: 681.47905353
Iter-147, train loss: 22.66997092, valid loss: 681.75249199
Iter-148, train loss: 23.49523294, valid loss: 683.82078559
Iter-149, train loss: 24.39352932, valid loss: 691.37359233
Iter-150, train loss: 24.40400012, valid loss: 693.50337465
Iter-151, train loss: 21.79522821, valid loss: 691.84165105
Iter-152, train loss: 24.43391910, valid loss: 691.08636507
Iter-153, train loss: 24.55648110, valid loss: 691.58196914
Iter-154, train loss: 22.92164345, valid loss: 694.87829307
Iter-155, train loss: 22.57249219, valid loss: 697.45938000
Iter-156, train loss: 25.82031236, valid loss: 696.82828648
Iter-157, train loss: 23.82056562, valid loss: 698.54660268
Iter-158, train loss: 25.81879717, valid loss: 692.75627348
Iter-159, train loss: 26.10517195, valid loss: 693.49994228
Iter-160, train loss: 23.53900510, valid loss: 663.20220788
Iter-161, train loss: 26.14923085, valid loss: 682.00301682
Iter-162, train loss: 25.85715133, valid loss: 674.89004975
Iter-163, train loss: 25.51320599, valid loss: 697.73123726
Iter-164, train loss: 22.05894983, valid loss: 692.08993749
Iter-165, train loss: 23.90395923, valid loss: 654.42687994
Iter-166, train loss: 23.22450773, valid loss: 645.18055656
Iter-167, train loss: 24.77702783, valid loss: 602.54202270
Iter-168, train loss: 24.78483621, valid loss: 610.23544074
Iter-169, train loss: 23.82316775, valid loss: 611.53913838
Iter-170, train loss: 26.23427613, valid loss: 617.60954134
Iter-171, train loss: 25.53025430, valid loss: 620.06474556
Iter-172, train loss: 24.39215459, valid loss: 624.32076654
Iter-173, train loss: 26.17162166, valid loss: 627.17722044
Iter-174, train loss: 25.09888735, valid loss: 633.17303200
Iter-175, train loss: 24.60725911, valid loss: 642.29508002
Iter-176, train loss: 24.67810499, valid loss: 643.35183465
Iter-177, train loss: 22.20784668, valid loss: 644.60059609
Iter-178, train loss: 26.05588282, valid loss: 648.25912809
Iter-179, train loss: 25.33745249, valid loss: 653.14754334
Iter-180, train loss: 24.43834844, valid loss: 648.83631881
Iter-181, train loss: 22.63973563, valid loss: 649.64328472
Iter-182, train loss: 23.83569072, valid loss: 705.66482394
Iter-183, train loss: 24.59998618, valid loss: 650.09090068
Iter-184, train loss: 21.82535625, valid loss: 656.29908524
Iter-185, train loss: 24.68546841, valid loss: 654.57607292
Iter-186, train loss: 22.25878482, valid loss: 653.47661030
Iter-187, train loss: 25.96454883, valid loss: 672.30616435
Iter-188, train loss: 21.83476588, valid loss: 719.07451178
Iter-189, train loss: 22.28671255, valid loss: 730.84234793
Iter-190, train loss: 24.97144741, valid loss: 741.99575467
Iter-191, train loss: 24.04666437, valid loss: 733.53295528
Iter-192, train loss: 23.03396029, valid loss: 729.51851428
Iter-193, train loss: 27.69277521, valid loss: 696.78457910
Iter-194, train loss: 23.03221238, valid loss: 681.25753653
Iter-195, train loss: 22.21077153, valid loss: 705.89353931
Iter-196, train loss: 19.98666351, valid loss: 703.73562322
Iter-197, train loss: 22.34258637, valid loss: 673.86576325
Iter-198, train loss: 20.55820430, valid loss: 659.07345322
Iter-199, train loss: 22.01257430, valid loss: 595.55950837
Iter-200, train loss: 20.39325318, valid loss: 608.25320937
Out[49]:
<__main__.GRU at 0x7fde2e4c3198>

In [50]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [51]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [53]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:], label='y_pred')
plt.plot(Y_valid[:], label='Y_valid')
plt.plot(X_valid[:], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: