In [9]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[9]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [10]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [11]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [12]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[12]:
(-1.0548364452851478e-16, 1.0, 1.0, (17379, 1), dtype('float64'))

In [13]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [14]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [15]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H):
        self.D = D
        self.H = H
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        low, high = (-1. / np.sqrt(Z / 2.)), (1. / np.sqrt(Z / 2.))
        m = dict(
#             Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            Wz=np.random.uniform(size=(Z, H), low=low, high=high),
            Wr=np.random.uniform(size=(Z, H), low=low, high=high),
            Wh=np.random.uniform(size=(Z, H), low=low, high=high),
            Wy=np.random.uniform(size=(H, D), low=low, high=high),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches = [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, fc_cache = self.forward(X, h, self.model)
            fc_caches.append(fc_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = fc_caches
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dX, dh, grad = self.backward(dy, dh, fc_caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return dX, grads # TODO: dX is not used but this is a REMINDER that it exists!
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [16]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:(i + minibatch_size)]
        y_mini = y[i:(i + minibatch_size)]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [17]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 200 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units) 

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 49.12238400, valid loss: 632.88334268
Iter-2, train loss: 44.73464075, valid loss: 632.73692080
Iter-3, train loss: 41.72123433, valid loss: 632.65475590
Iter-4, train loss: 39.16055403, valid loss: 632.67315929
Iter-5, train loss: 36.91749477, valid loss: 632.76823888
Iter-6, train loss: 35.01899626, valid loss: 632.93258421
Iter-7, train loss: 33.47260176, valid loss: 633.23042928
Iter-8, train loss: 32.18525552, valid loss: 633.75773432
Iter-9, train loss: 31.04521832, valid loss: 634.53655424
Iter-10, train loss: 29.99458687, valid loss: 635.51743283
Iter-11, train loss: 29.00795537, valid loss: 636.63826053
Iter-12, train loss: 28.06882097, valid loss: 637.86065546
Iter-13, train loss: 27.16188137, valid loss: 639.19594803
Iter-14, train loss: 26.27056742, valid loss: 640.73275606
Iter-15, train loss: 25.37503866, valid loss: 642.69033015
Iter-16, train loss: 24.44904562, valid loss: 645.57090041
Iter-17, train loss: 23.45374313, valid loss: 650.62210302
Iter-18, train loss: 22.32487328, valid loss: 661.81931091
Iter-19, train loss: 20.99872045, valid loss: 697.37654114
Iter-20, train loss: 19.74850374, valid loss: 800.94966419
Iter-21, train loss: 18.86225932, valid loss: 889.70705998
Iter-22, train loss: 18.12204504, valid loss: 923.40181109
Iter-23, train loss: 17.49575868, valid loss: 949.65253771
Iter-24, train loss: 16.95580388, valid loss: 974.85208721
Iter-25, train loss: 16.48206235, valid loss: 999.09495700
Iter-26, train loss: 16.06025927, valid loss: 1022.10974603
Iter-27, train loss: 15.67870128, valid loss: 1044.16029515
Iter-28, train loss: 15.32687186, valid loss: 1065.92562755
Iter-29, train loss: 14.99574709, valid loss: 1088.28724640
Iter-30, train loss: 14.67828827, valid loss: 1111.95438878
Iter-31, train loss: 14.36962318, valid loss: 1137.12394504
Iter-32, train loss: 14.06680869, valid loss: 1163.44074191
Iter-33, train loss: 13.76837672, valid loss: 1190.29382810
Iter-34, train loss: 13.47393571, valid loss: 1217.20051748
Iter-35, train loss: 13.18383398, valid loss: 1244.00301790
Iter-36, train loss: 12.89880415, valid loss: 1270.80741760
Iter-37, train loss: 12.61956911, valid loss: 1297.76505787
Iter-38, train loss: 12.34645531, valid loss: 1324.86444607
Iter-39, train loss: 12.07912693, valid loss: 1351.86465807
Iter-40, train loss: 11.81654801, valid loss: 1378.35688112
Iter-41, train loss: 11.55716970, valid loss: 1403.83558957
Iter-42, train loss: 11.29931467, valid loss: 1427.77754530
Iter-43, train loss: 11.04172318, valid loss: 1449.78655246
Iter-44, train loss: 10.78409669, valid loss: 1469.68635834
Iter-45, train loss: 10.52745094, valid loss: 1487.51677065
Iter-46, train loss: 10.27416088, valid loss: 1503.47828136
Iter-47, train loss: 10.02758413, valid loss: 1517.85534927
Iter-48, train loss: 9.79126720, valid loss: 1530.95536400
Iter-49, train loss: 9.56798628, valid loss: 1543.07672575
Iter-50, train loss: 9.35905694, valid loss: 1554.50138869
Iter-51, train loss: 9.16423316, valid loss: 1565.49669752
Iter-52, train loss: 8.98214385, valid loss: 1576.31112289
Iter-53, train loss: 8.81092386, valid loss: 1587.15956018
Iter-54, train loss: 8.64871288, valid loss: 1598.20449663
Iter-55, train loss: 8.49390276, valid loss: 1609.54031888
Iter-56, train loss: 8.34518260, valid loss: 1621.18262187
Iter-57, train loss: 8.20147924, valid loss: 1633.06000528
Iter-58, train loss: 8.06186427, valid loss: 1645.00501306
Iter-59, train loss: 7.92546171, valid loss: 1656.74220852
Iter-60, train loss: 7.79136834, valid loss: 1667.87323934
Iter-61, train loss: 7.65859207, valid loss: 1677.85967061
Iter-62, train loss: 7.52601430, valid loss: 1686.00240698
Iter-63, train loss: 7.39238412, valid loss: 1691.40684183
Iter-64, train loss: 7.25635469, valid loss: 1692.87894136
Iter-65, train loss: 7.11657368, valid loss: 1688.58368061
Iter-66, train loss: 6.97183983, valid loss: 1678.11005716
Iter-67, train loss: 6.82133360, valid loss: 1684.67682986
Iter-68, train loss: 6.66491315, valid loss: 1713.53486655
Iter-69, train loss: 6.50343541, valid loss: 1779.71249878
Iter-70, train loss: 6.33901965, valid loss: 1825.58285108
Iter-71, train loss: 6.17513260, valid loss: 1845.43629163
Iter-72, train loss: 6.01636804, valid loss: 1862.97390013
Iter-73, train loss: 5.86784190, valid loss: 1875.41611539
Iter-74, train loss: 5.73424800, valid loss: 1849.23190872
Iter-75, train loss: 5.61881231, valid loss: 1849.33194003
Iter-76, train loss: 5.52255207, valid loss: 1847.51857091
Iter-77, train loss: 5.44420788, valid loss: 1873.12432324
Iter-78, train loss: 5.38089072, valid loss: 1800.04479703
Iter-79, train loss: 5.32908813, valid loss: 1764.80053457
Iter-80, train loss: 5.28555165, valid loss: 1728.43000629
Iter-81, train loss: 5.24778811, valid loss: 1730.31463312
Iter-82, train loss: 5.21414545, valid loss: 1694.20374871
Iter-83, train loss: 5.18363916, valid loss: 1687.18147713
Iter-84, train loss: 5.15569738, valid loss: 1685.44374727
Iter-85, train loss: 5.12995209, valid loss: 1682.49514624
Iter-86, train loss: 5.10612186, valid loss: 1643.64682800
Iter-87, train loss: 5.08396911, valid loss: 1596.13915255
Iter-88, train loss: 5.06329497, valid loss: 1639.94631051
Iter-89, train loss: 5.04394361, valid loss: 1677.58700393
Iter-90, train loss: 5.02580410, valid loss: 1661.47801612
Iter-91, train loss: 5.00880769, valid loss: 1723.57971835
Iter-92, train loss: 4.99292238, valid loss: 1714.65757940
Iter-93, train loss: 4.97814664, valid loss: 1672.44007578
Iter-94, train loss: 4.96450290, valid loss: 1700.97644536
Iter-95, train loss: 4.95203078, valid loss: 1603.81011539
Iter-96, train loss: 4.94077976, valid loss: 1576.50271807
Iter-97, train loss: 4.93080120, valid loss: 1642.16923794
Iter-98, train loss: 4.92213995, valid loss: 1588.37482551
Iter-99, train loss: 4.91482630, valid loss: 1595.82456251
Iter-100, train loss: 4.90886880, valid loss: 1656.85459420
Iter-101, train loss: 4.90424908, valid loss: 1583.78060922
Iter-102, train loss: 4.90091902, valid loss: 1674.27726031
Iter-103, train loss: 4.89880055, valid loss: 1624.76204107
Iter-104, train loss: 4.89778812, valid loss: 1694.83462487
Iter-105, train loss: 4.89775321, valid loss: 1603.72703402
Iter-106, train loss: 4.89855037, valid loss: 1553.12775693
Iter-107, train loss: 4.90002393, valid loss: 1529.52317670
Iter-108, train loss: 4.90201469, valid loss: 1664.50424361
Iter-109, train loss: 4.90436594, valid loss: 1470.87788587
Iter-110, train loss: 4.90692839, valid loss: 1565.47436672
Iter-111, train loss: 4.90956385, valid loss: 1537.46483145
Iter-112, train loss: 4.91214751, valid loss: 1638.14495854
Iter-113, train loss: 4.91456909, valid loss: 1593.96333094
Iter-114, train loss: 4.91673290, valid loss: 1565.31504218
Iter-115, train loss: 4.91855729, valid loss: 1661.59564749
Iter-116, train loss: 4.91997345, valid loss: 1692.65649279
Iter-117, train loss: 4.92092411, valid loss: 1645.94196629
Iter-118, train loss: 4.92136211, valid loss: 1595.35210790
Iter-119, train loss: 4.92124915, valid loss: 1901.18725387
Iter-120, train loss: 4.92055465, valid loss: 2105.94122084
Iter-121, train loss: 4.91925501, valid loss: 2008.27517199
Iter-122, train loss: 4.91733301, valid loss: 1910.66071366
Iter-123, train loss: 4.91477747, valid loss: 2216.48283224
Iter-124, train loss: 4.91158306, valid loss: 2205.13601537
Iter-125, train loss: 4.90775019, valid loss: 2193.63962115
Iter-126, train loss: 4.90328490, valid loss: 2177.51447943
Iter-127, train loss: 4.89819872, valid loss: 2184.92999726
Iter-128, train loss: 4.89250844, valid loss: 2177.45787960
Iter-129, train loss: 4.88623579, valid loss: 1454.02583309
Iter-130, train loss: 4.87940705, valid loss: 2058.53402452
Iter-131, train loss: 4.87205254, valid loss: 2050.30114352
Iter-132, train loss: 4.86420602, valid loss: 1639.38335534
Iter-133, train loss: 4.85590407, valid loss: 1617.86948903
Iter-134, train loss: 4.84718544, valid loss: 1568.19337210
Iter-135, train loss: 4.83809034, valid loss: 1459.19216529
Iter-136, train loss: 4.82865979, valid loss: 1329.06101088
Iter-137, train loss: 4.81893502, valid loss: 1527.21365108
Iter-138, train loss: 4.80895687, valid loss: 1399.18710278
Iter-139, train loss: 4.79876529, valid loss: 1262.09846745
Iter-140, train loss: 4.78839894, valid loss: 1268.09416448
Iter-141, train loss: 4.77789480, valid loss: 1454.28435729
Iter-142, train loss: 4.76728794, valid loss: 1524.02440503
Iter-143, train loss: 4.75661130, valid loss: 1132.91259235
Iter-144, train loss: 4.74589556, valid loss: 1363.87162997
Iter-145, train loss: 4.73516909, valid loss: 1166.51698313
Iter-146, train loss: 4.72445786, valid loss: 1038.43085842
Iter-147, train loss: 4.71378552, valid loss: 1253.26774096
Iter-148, train loss: 4.70317340, valid loss: 1023.05038067
Iter-149, train loss: 4.69264054, valid loss: 1246.24072855
Iter-150, train loss: 4.68220381, valid loss: 1052.87615827
Iter-151, train loss: 4.67187799, valid loss: 1253.50815784
Iter-152, train loss: 4.66167580, valid loss: 1134.37377003
Iter-153, train loss: 4.65160809, valid loss: 1090.50553910
Iter-154, train loss: 4.64168384, valid loss: 889.40431921
Iter-155, train loss: 4.63191036, valid loss: 929.08202443
Iter-156, train loss: 4.62229334, valid loss: 923.50160520
Iter-157, train loss: 4.61283700, valid loss: 895.66222556
Iter-158, train loss: 4.60354418, valid loss: 895.11061707
Iter-159, train loss: 4.59441648, valid loss: 881.72111656
Iter-160, train loss: 4.58545436, valid loss: 851.26335053
Iter-161, train loss: 4.57665731, valid loss: 842.75963794
Iter-162, train loss: 4.56802390, valid loss: 835.54474342
Iter-163, train loss: 4.55955194, valid loss: 803.30961503
Iter-164, train loss: 4.55123862, valid loss: 776.74614966
Iter-165, train loss: 4.54308054, valid loss: 754.29283456
Iter-166, train loss: 4.53507390, valid loss: 736.62539552
Iter-167, train loss: 4.52721453, valid loss: 722.11300906
Iter-168, train loss: 4.51949801, valid loss: 688.97378520
Iter-169, train loss: 4.51191977, valid loss: 657.73339456
Iter-170, train loss: 4.50447509, valid loss: 648.92098233
Iter-171, train loss: 4.49715926, valid loss: 648.45099222
Iter-172, train loss: 4.48996756, valid loss: 648.82802920
Iter-173, train loss: 4.48289534, valid loss: 649.44667490
Iter-174, train loss: 4.47593807, valid loss: 649.61648430
Iter-175, train loss: 4.46909133, valid loss: 651.76580712
Iter-176, train loss: 4.46235090, valid loss: 658.00379558
Iter-177, train loss: 4.45571271, valid loss: 687.00287833
Iter-178, train loss: 4.44917291, valid loss: 704.38344217
Iter-179, train loss: 4.44272786, valid loss: 715.61555088
Iter-180, train loss: 4.43637413, valid loss: 718.91255095
Iter-181, train loss: 4.43010849, valid loss: 730.19969266
Iter-182, train loss: 4.42392796, valid loss: 740.91181982
Iter-183, train loss: 4.41782972, valid loss: 742.32325861
Iter-184, train loss: 4.41181120, valid loss: 782.50346171
Iter-185, train loss: 4.40586998, valid loss: 777.35522682
Iter-186, train loss: 4.40000385, valid loss: 785.73140625
Iter-187, train loss: 4.39421076, valid loss: 796.75265880
Iter-188, train loss: 4.38848881, valid loss: 809.84463169
Iter-189, train loss: 4.38283626, valid loss: 814.23931512
Iter-190, train loss: 4.37725149, valid loss: 794.84769445
Iter-191, train loss: 4.37173300, valid loss: 779.41804088
Iter-192, train loss: 4.36627938, valid loss: 781.47993439
Iter-193, train loss: 4.36088934, valid loss: 785.23031015
Iter-194, train loss: 4.35556164, valid loss: 789.29245762
Iter-195, train loss: 4.35029513, valid loss: 784.82693523
Iter-196, train loss: 4.34508867, valid loss: 783.22302915
Iter-197, train loss: 4.33994122, valid loss: 787.51120319
Iter-198, train loss: 4.33485174, valid loss: 803.80039163
Iter-199, train loss: 4.32981921, valid loss: 820.00379433
Iter-200, train loss: 4.32484265, valid loss: 839.09570180
Out[17]:
<__main__.GRU at 0x7f143f5a0b70>

In [18]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [19]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [28]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:], label='y_pred')
plt.plot(Y_valid[:], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: