In [1]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[1]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [2]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [3]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [4]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[4]:
(-1.0548364452851478e-16, 1.0, 1.0, (17379, 1), dtype('float64'))

In [5]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [6]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [7]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, L, p_dropout):
        self.D = D
        self.H = H
        self.L = L
        self.p_dropout = p_dropout
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = []
        for _ in range(self.L):
            self.model.append(m)
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches, do_caches = [], [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            for layer in range(self.L):
                y, h, fc_cache = self.forward(X, h, self.model[layer])
                y, do_cache = l.dropout_forward(y, self.p_dropout)
                X = y.copy()
            fc_caches.append(fc_cache)
            do_caches.append(do_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = (fc_caches, do_caches)
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches, do_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model[0].items()}
        grads = [] #{key: np.zeros_like(val) for key, val in self.model.items()}
        for _ in range(self.L):
            grads.append(grad)

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            for layer in reversed(range(self.L)):
                dy = l.dropout_backward(dy, do_caches[t])
                dX, dh, grad = self.backward(dy, dh, fc_caches[t])
                dy = dX.copy() # for the previous layer
                for key in grad.keys():
                    grads[layer][key] += grad[key]
                
        return dX, grads
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            for layer in range(self.L):
                y, h, _ = self.forward(X, h, self.model[layer])
                X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [8]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    R = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    for _ in range(nn.L):
        M.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for layer in range(nn.L):
                for key in grads[0].keys(): # key, value, items
                    M[layer][key] = l.exp_running_avg(M[layer][key], grads[layer][key], beta1)
                    R[layer][key] = l.exp_running_avg(R[layer][key], grads[layer][key]**2, beta2)
                    m_k_hat = M[layer][key] / (1. - (beta1** iter))
                    r_k_hat = R[layer][key] / (1. - (beta2** iter))
                    nn.model[layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [ ]:
# Hyper-parameters
time_step = 64 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 1000 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features
num_hidden_layers = 3 # number of hidden layers
keep_prob = 0.95 # p_dropout == keep_prob

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=keep_prob, L=num_hidden_layers) # , lam=lam, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 29.95904319, valid loss: 15147.51299166
Iter-2, train loss: 26.76610040, valid loss: 18430.93149886
Iter-3, train loss: 29.89737301, valid loss: 18989.18575991
Iter-4, train loss: 30.63914478, valid loss: 19467.10633892
Iter-5, train loss: 28.12309078, valid loss: 20221.16355913
Iter-6, train loss: 27.93202578, valid loss: 19815.60087014
Iter-7, train loss: 30.33559378, valid loss: 19124.05583837
Iter-8, train loss: 30.70543730, valid loss: 18804.98586665
Iter-9, train loss: 30.79582132, valid loss: 17085.20390194
Iter-10, train loss: 29.96441224, valid loss: 16868.66676030
Iter-11, train loss: 30.78931562, valid loss: 16350.86497731
Iter-12, train loss: 29.55159749, valid loss: 15744.32595255
Iter-13, train loss: 28.29052964, valid loss: 12791.54206014
Iter-14, train loss: 30.38785852, valid loss: 13497.96740853
Iter-15, train loss: 30.77999285, valid loss: 12841.20004545
Iter-16, train loss: 30.73124126, valid loss: 11946.78097313
Iter-17, train loss: 30.78931562, valid loss: 11663.72518938
Iter-18, train loss: 30.19782744, valid loss: 13071.12860711
Iter-19, train loss: 30.68307593, valid loss: 12915.16993649
Iter-20, train loss: 30.45066394, valid loss: 14562.73716493
Iter-21, train loss: 30.77385560, valid loss: 12653.67763308
Iter-22, train loss: 29.94059661, valid loss: 12490.47720890
Iter-23, train loss: 29.21653209, valid loss: 11927.10049616
Iter-24, train loss: 30.28781967, valid loss: 12382.28260556
Iter-25, train loss: 31.57101614, valid loss: 10680.61798177
Iter-26, train loss: 28.94761504, valid loss: 8976.75395005
Iter-27, train loss: 28.20324017, valid loss: 5109.14373270
Iter-28, train loss: 28.75643137, valid loss: 5484.07766024
Iter-29, train loss: 30.33799196, valid loss: 4056.65818095
Iter-30, train loss: 29.90732979, valid loss: 2860.37108451
Iter-31, train loss: 31.98373379, valid loss: 2936.12507993
Iter-32, train loss: 29.69708915, valid loss: 3357.08857565
Iter-33, train loss: 28.46712429, valid loss: 2798.59455505
Iter-34, train loss: 29.41728973, valid loss: 1851.51573120
Iter-35, train loss: 29.50197102, valid loss: 1785.58936368
Iter-36, train loss: 31.33845515, valid loss: 1766.31359586
Iter-37, train loss: 30.78970830, valid loss: 1797.82281767
Iter-38, train loss: 30.42730808, valid loss: 1996.48615094
Iter-39, train loss: 29.24379904, valid loss: 1288.02057638
Iter-40, train loss: 31.47831107, valid loss: 1576.59124375
Iter-41, train loss: 29.19200957, valid loss: 1514.95938226
Iter-42, train loss: 29.80375885, valid loss: 1382.47104369
Iter-43, train loss: 30.55331008, valid loss: 1246.07753469
Iter-44, train loss: 30.15013337, valid loss: 1112.47459983
Iter-45, train loss: 29.43841586, valid loss: 1193.19728906
Iter-46, train loss: 32.72387678, valid loss: 1221.48004359
Iter-47, train loss: 31.89715096, valid loss: 1267.93856571
Iter-48, train loss: 30.65549368, valid loss: 905.44834567
Iter-49, train loss: 30.30015388, valid loss: 1131.72578893
Iter-50, train loss: 27.90295210, valid loss: 1187.67088453
Iter-51, train loss: 29.40532079, valid loss: 1180.66318009
Iter-52, train loss: 30.99642521, valid loss: 1073.73821190
Iter-53, train loss: 28.94362503, valid loss: 1054.13174133
Iter-54, train loss: 28.65785594, valid loss: 1044.12043122
Iter-55, train loss: 29.51194351, valid loss: 1021.60462340
Iter-56, train loss: 29.22514227, valid loss: 1256.09212980
Iter-57, train loss: 32.39530177, valid loss: 1171.62892843
Iter-58, train loss: 29.12184714, valid loss: 1137.99843677
Iter-59, train loss: 31.08985098, valid loss: 1212.13095583
Iter-60, train loss: 32.60020004, valid loss: 818.34653445
Iter-61, train loss: 30.34890363, valid loss: 1288.41476072
Iter-62, train loss: 31.73417003, valid loss: 1345.02912221
Iter-63, train loss: 30.43779293, valid loss: 2284.19398824
Iter-64, train loss: 28.65460843, valid loss: 723.67419237
Iter-65, train loss: 30.75977514, valid loss: 786.86259094
Iter-66, train loss: 30.29515716, valid loss: 745.80251272
Iter-67, train loss: 31.06458123, valid loss: 1117.69388532
Iter-68, train loss: 30.00605987, valid loss: 1114.73561232
Iter-69, train loss: 30.20045330, valid loss: 1160.26790064
Iter-70, train loss: 31.21066582, valid loss: 1201.70821715
Iter-71, train loss: 30.24758527, valid loss: 1022.18440822
Iter-72, train loss: 30.81582140, valid loss: 1332.30943153
Iter-73, train loss: 30.68540069, valid loss: 1325.44786104
Iter-74, train loss: 30.52506492, valid loss: 1329.31703549
Iter-75, train loss: 27.43281930, valid loss: 1265.44106492
Iter-76, train loss: 30.63674319, valid loss: 1354.83387302
Iter-77, train loss: 30.98109983, valid loss: 1271.81981846
Iter-78, train loss: 30.12562905, valid loss: 1474.90514927
Iter-79, train loss: 28.67039199, valid loss: 1452.63224176
Iter-80, train loss: 30.84055265, valid loss: 1216.38169558
Iter-81, train loss: 30.01777772, valid loss: 1397.98864378
Iter-82, train loss: 31.90291633, valid loss: 1495.96116512
Iter-83, train loss: 29.95235677, valid loss: 1363.63895682
Iter-84, train loss: 30.29052405, valid loss: 1488.78104939
Iter-85, train loss: 31.73977946, valid loss: 1343.21353191
Iter-86, train loss: 30.48897677, valid loss: 1195.38926894
Iter-87, train loss: 30.40464981, valid loss: 1374.18964807
Iter-88, train loss: 30.15164754, valid loss: 973.18452264
Iter-89, train loss: 29.48078812, valid loss: 1274.26374120
Iter-90, train loss: 32.11572848, valid loss: 1392.44642702
Iter-91, train loss: 29.87785982, valid loss: 911.90463336
Iter-92, train loss: 31.51032584, valid loss: 1291.49433774
Iter-93, train loss: 29.46140018, valid loss: 931.94156170
Iter-94, train loss: 29.59167480, valid loss: 1061.35736046
Iter-95, train loss: 29.60440697, valid loss: 760.44937223
Iter-96, train loss: 29.99005921, valid loss: 934.97760355
Iter-97, train loss: 27.78152650, valid loss: 1162.59730531
Iter-98, train loss: 29.81008620, valid loss: 1221.71133742
Iter-99, train loss: 30.78931562, valid loss: 1521.04430992
Iter-100, train loss: 29.08295289, valid loss: 1364.95056534
Iter-101, train loss: 29.92026626, valid loss: 1808.19079668
Iter-102, train loss: 30.15060196, valid loss: 2385.67058002
Iter-103, train loss: 31.04411764, valid loss: 2380.09979130
Iter-104, train loss: 31.09523228, valid loss: 1765.13368094
Iter-105, train loss: 30.86905525, valid loss: 2568.74542211
Iter-106, train loss: 30.32458808, valid loss: 2202.42826326
Iter-107, train loss: 30.41230551, valid loss: 2934.08131276
Iter-108, train loss: 31.13859721, valid loss: 13201.21700395
Iter-109, train loss: 28.68743506, valid loss: 6272.74226090
Iter-110, train loss: 30.78931562, valid loss: 4573.68397094
Iter-111, train loss: 30.44611936, valid loss: 2882.43358053
Iter-112, train loss: 32.12387472, valid loss: 5220.00674743
Iter-113, train loss: 30.90613875, valid loss: 4300.32554247
Iter-114, train loss: 30.57178310, valid loss: 4712.87185476
Iter-115, train loss: 31.26457019, valid loss: 5084.87140162
Iter-116, train loss: 29.10652681, valid loss: 5435.79749653
Iter-117, train loss: 30.58955123, valid loss: 3427.89902309
Iter-118, train loss: 29.81549417, valid loss: 747.82767116
Iter-119, train loss: 28.69109151, valid loss: 875.07037481
Iter-120, train loss: 30.15306488, valid loss: 3015.55274440
Iter-121, train loss: 29.88569388, valid loss: 3861.78145416
Iter-122, train loss: 31.17956525, valid loss: 7164.53342097
Iter-123, train loss: 28.14520491, valid loss: 7978.40047349
Iter-124, train loss: 28.19677350, valid loss: 701.21842210
Iter-125, train loss: 29.39360865, valid loss: 2056.20133330
Iter-126, train loss: 31.65293282, valid loss: 4192.31966543
Iter-127, train loss: 31.00683067, valid loss: 3194.79208003
Iter-128, train loss: 30.25764949, valid loss: 1355.57533151
Iter-129, train loss: 30.49035396, valid loss: 1755.31017142
Iter-130, train loss: 31.57930464, valid loss: 2879.78750226
Iter-131, train loss: 30.78931562, valid loss: 1413.62770528
Iter-132, train loss: 30.06032042, valid loss: 1389.29191387
Iter-133, train loss: 28.46204054, valid loss: 1284.37331308
Iter-134, train loss: 30.59250338, valid loss: 775.36829279
Iter-135, train loss: 31.73740216, valid loss: 1735.53617189
Iter-136, train loss: 29.06295054, valid loss: 1167.04356050
Iter-137, train loss: 31.18668065, valid loss: 1254.36275014
Iter-138, train loss: 29.73058356, valid loss: 1274.54252172
Iter-139, train loss: 31.83826740, valid loss: 1390.40078642
Iter-140, train loss: 28.56806969, valid loss: 1250.33666886
Iter-141, train loss: 30.07273140, valid loss: 1356.17926564
Iter-142, train loss: 30.58182552, valid loss: 1128.22333285
Iter-143, train loss: 29.87292443, valid loss: 844.26052943
Iter-144, train loss: 31.49801005, valid loss: 1037.01221911
Iter-145, train loss: 30.86778330, valid loss: 901.10292659
Iter-146, train loss: 31.11938045, valid loss: 1007.98861336
Iter-147, train loss: 30.60806047, valid loss: 1187.85462697
Iter-148, train loss: 31.68410535, valid loss: 1473.26862200
Iter-149, train loss: 30.00306894, valid loss: 1340.51573192
Iter-150, train loss: 31.72177194, valid loss: 1045.96965689
Iter-151, train loss: 30.78172212, valid loss: 982.46910573
Iter-152, train loss: 30.19439766, valid loss: 2052.49240884
Iter-153, train loss: 30.47496242, valid loss: 1464.26554386
Iter-154, train loss: 31.16199301, valid loss: 915.29474025
Iter-155, train loss: 29.74325310, valid loss: 2167.62821895
Iter-156, train loss: 30.75216707, valid loss: 806.34582073
Iter-157, train loss: 29.89581332, valid loss: 2477.76881112
Iter-158, train loss: 30.89004940, valid loss: 2137.33804747
Iter-159, train loss: 30.94812239, valid loss: 1509.08801356
Iter-160, train loss: 30.80645181, valid loss: 4526.86000719
Iter-161, train loss: 31.80345792, valid loss: 6497.52812271
Iter-162, train loss: 30.54669915, valid loss: 1082.84776461
Iter-163, train loss: 31.89237117, valid loss: 647.15944105
Iter-164, train loss: 31.15501845, valid loss: 793.92236985
Iter-165, train loss: 30.40317248, valid loss: 3006.80331345
Iter-166, train loss: 27.94622036, valid loss: 1207.56044372
Iter-167, train loss: 37.90120606, valid loss: 10115.19727143
Iter-168, train loss: 37.99065688, valid loss: 3986.08168106
Iter-169, train loss: 32.44014014, valid loss: 3555.77070195
Iter-170, train loss: 34.11517834, valid loss: 2812.11957367
Iter-171, train loss: 27.75333024, valid loss: 2118.62643529
Iter-172, train loss: 37.41151116, valid loss: 1896.66109351
Iter-173, train loss: 32.39968709, valid loss: 972.86175644
Iter-174, train loss: 32.22294600, valid loss: 689.88320541
Iter-175, train loss: 31.15582739, valid loss: 656.87072949
Iter-176, train loss: 31.43952468, valid loss: 1037.80428387
Iter-177, train loss: 33.53494837, valid loss: 1200.94258287
Iter-178, train loss: 30.33128707, valid loss: 1226.80809609
Iter-179, train loss: 31.60179124, valid loss: 1050.60092276
Iter-180, train loss: 30.66984193, valid loss: 630.25113225
Iter-181, train loss: 29.69195482, valid loss: 677.21250226
Iter-182, train loss: 31.55312742, valid loss: 904.92264482
Iter-183, train loss: 30.82717277, valid loss: 1896.09399165
Iter-184, train loss: 31.06204581, valid loss: 863.98434990
Iter-185, train loss: 29.92435087, valid loss: 1270.45320938
Iter-186, train loss: 27.26366049, valid loss: 2397.60707335
Iter-187, train loss: 32.54116173, valid loss: 1003.04565803
Iter-188, train loss: 31.58826796, valid loss: 841.76977206
Iter-189, train loss: 29.58546740, valid loss: 712.81193938
Iter-190, train loss: 30.34173235, valid loss: 762.49506004
Iter-191, train loss: 32.44771565, valid loss: 893.66895670
Iter-192, train loss: 30.78931562, valid loss: 780.57330250
Iter-193, train loss: 32.58743851, valid loss: 810.31807030
Iter-194, train loss: 32.77586642, valid loss: 844.71980430
Iter-195, train loss: 30.78931562, valid loss: 795.36012367
Iter-196, train loss: 29.31364947, valid loss: 667.59487828
Iter-197, train loss: 30.99458135, valid loss: 873.25520303

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:100, 1], label='y_pred')
plt.plot(Y_valid[:100, 1], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()

In [ ]:


In [ ]: