In [1]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[1]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [2]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [3]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -3:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [4]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[4]:
(-1.7444323642958519e-17, 1.0, 1.0, (17379, 3), dtype('float64'))

In [5]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [6]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [7]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H):
        self.D = D
        self.H = H
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        low, high = -1.0, 1.0
        m = dict(
            # Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            # Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            # Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            # Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            Wz=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
            Wr=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
            Wh=np.random.uniform(size=(Z, H), low=low, high=high) / np.sqrt(Z / 2.),
            Wy=np.random.uniform(size=(H, D), low=low, high=high) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches = [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, fc_cache = self.forward(X, h, self.model)
            fc_caches.append(fc_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = fc_caches
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dX, dh, grad = self.backward(dy, dh, fc_caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return dX, grads # TODO: dX is not used but this is a REMINDER that it exists!
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [8]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:(i + minibatch_size)]
        y_mini = y[i:(i + minibatch_size)]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [9]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 200 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units) 

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 40.17664305, valid loss: 595.86430121
Iter-2, train loss: 35.56985701, valid loss: 596.25142097
Iter-3, train loss: 32.42972038, valid loss: 598.63829772
Iter-4, train loss: 29.94224640, valid loss: 603.47599300
Iter-5, train loss: 27.95805331, valid loss: 610.59662444
Iter-6, train loss: 26.40043235, valid loss: 618.77277694
Iter-7, train loss: 25.15489396, valid loss: 626.41031852
Iter-8, train loss: 24.10799124, valid loss: 632.34503132
Iter-9, train loss: 23.18561732, valid loss: 635.95515148
Iter-10, train loss: 22.34675290, valid loss: 637.20494450
Iter-11, train loss: 21.56754013, valid loss: 636.48658489
Iter-12, train loss: 20.83178653, valid loss: 634.30150163
Iter-13, train loss: 20.12671819, valid loss: 631.05814321
Iter-14, train loss: 19.44135762, valid loss: 627.03825552
Iter-15, train loss: 18.76605375, valid loss: 622.41535258
Iter-16, train loss: 18.09299493, valid loss: 617.33023310
Iter-17, train loss: 17.41727763, valid loss: 611.98514078
Iter-18, train loss: 16.73809498, valid loss: 606.73181842
Iter-19, train loss: 16.05995419, valid loss: 602.21126633
Iter-20, train loss: 15.39277570, valid loss: 599.45787738
Iter-21, train loss: 14.74981561, valid loss: 599.78010425
Iter-22, train loss: 14.14452690, valid loss: 604.20913741
Iter-23, train loss: 13.58626695, valid loss: 612.65917108
Iter-24, train loss: 13.07628499, valid loss: 623.52136872
Iter-25, train loss: 12.60797436, valid loss: 634.03513125
Iter-26, train loss: 12.17196663, valid loss: 641.38284212
Iter-27, train loss: 11.76116305, valid loss: 643.93673407
Iter-28, train loss: 11.37210110, valid loss: 641.67928582
Iter-29, train loss: 11.00363955, valid loss: 635.76302290
Iter-30, train loss: 10.65523918, valid loss: 627.91382034
Iter-31, train loss: 10.32593297, valid loss: 620.09374900
Iter-32, train loss: 10.01403991, valid loss: 614.32866664
Iter-33, train loss: 9.71734112, valid loss: 612.19625000
Iter-34, train loss: 9.43343448, valid loss: 614.98750284
Iter-35, train loss: 9.16006927, valid loss: 651.05043076
Iter-36, train loss: 8.89536878, valid loss: 757.10639674
Iter-37, train loss: 8.63793051, valid loss: 863.70506914
Iter-38, train loss: 8.38683249, valid loss: 2560.84110036
Iter-39, train loss: 8.14158168, valid loss: 2595.14030456
Iter-40, train loss: 7.90203221, valid loss: 2598.84147169
Iter-41, train loss: 7.66829378, valid loss: 2579.52429498
Iter-42, train loss: 7.44064599, valid loss: 2539.42035136
Iter-43, train loss: 7.21947130, valid loss: 2479.27830218
Iter-44, train loss: 7.00521497, valid loss: 2399.05270166
Iter-45, train loss: 6.79837572, valid loss: 2299.83302557
Iter-46, train loss: 6.59952288, valid loss: 2188.60609951
Iter-47, train loss: 6.40932267, valid loss: 2058.61714375
Iter-48, train loss: 6.22854412, valid loss: 1886.46033495
Iter-49, train loss: 6.05801700, valid loss: 1641.78899293
Iter-50, train loss: 5.89853615, valid loss: 1317.74408813
Iter-51, train loss: 5.75073718, valid loss: 782.25832766
Iter-52, train loss: 5.61498624, valid loss: 1270.84058175
Iter-53, train loss: 5.49132014, valid loss: 1669.29979216
Iter-54, train loss: 5.37944925, valid loss: 1762.11267474
Iter-55, train loss: 5.27881083, valid loss: 1731.94756231
Iter-56, train loss: 5.18864882, valid loss: 1704.00644271
Iter-57, train loss: 5.10809676, valid loss: 1675.53715030
Iter-58, train loss: 5.03624850, valid loss: 1650.68514744
Iter-59, train loss: 4.97221031, valid loss: 1602.56468537
Iter-60, train loss: 4.91513448, valid loss: 1561.29274312
Iter-61, train loss: 4.86423761, valid loss: 1554.92441608
Iter-62, train loss: 4.81880800, valid loss: 1563.51079228
Iter-63, train loss: 4.77820593, valid loss: 1569.29119820
Iter-64, train loss: 4.74185989, valid loss: 1570.23702899
Iter-65, train loss: 4.70926071, valid loss: 1572.59171942
Iter-66, train loss: 4.67995511, valid loss: 1575.12584115
Iter-67, train loss: 4.65353930, valid loss: 1578.66420446
Iter-68, train loss: 4.62965303, valid loss: 1581.08309136
Iter-69, train loss: 4.60797444, valid loss: 1581.90922607
Iter-70, train loss: 4.58821562, valid loss: 1584.41279658
Iter-71, train loss: 4.57011888, valid loss: 1585.83176485
Iter-72, train loss: 4.55345363, valid loss: 1592.35888198
Iter-73, train loss: 4.53801380, valid loss: 1600.36680358
Iter-74, train loss: 4.52361568, valid loss: 1607.18972035
Iter-75, train loss: 4.51009594, valid loss: 1615.69066922
Iter-76, train loss: 4.49730993, valid loss: 1620.64512404
Iter-77, train loss: 4.48513001, valid loss: 1612.12607437
Iter-78, train loss: 4.47344389, valid loss: 1618.20103612
Iter-79, train loss: 4.46215301, valid loss: 1627.49511267
Iter-80, train loss: 4.45117088, valid loss: 1640.01465733
Iter-81, train loss: 4.44042141, valid loss: 1656.82800631
Iter-82, train loss: 4.42983734, valid loss: 1676.38530329
Iter-83, train loss: 4.41935876, valid loss: 1708.71235298
Iter-84, train loss: 4.40893173, valid loss: 1742.94019875
Iter-85, train loss: 4.39850727, valid loss: 1769.56787182
Iter-86, train loss: 4.38804043, valid loss: 1790.56166167
Iter-87, train loss: 4.37748987, valid loss: 1798.53819914
Iter-88, train loss: 4.36681755, valid loss: 1813.43644619
Iter-89, train loss: 4.35598883, valid loss: 1825.09640533
Iter-90, train loss: 4.34497273, valid loss: 1840.37475816
Iter-91, train loss: 4.33374239, valid loss: 1855.92650706
Iter-92, train loss: 4.32227559, valid loss: 1867.39674697
Iter-93, train loss: 4.31055532, valid loss: 1876.93466755
Iter-94, train loss: 4.29857024, valid loss: 1884.98882196
Iter-95, train loss: 4.28631499, valid loss: 1887.31382302
Iter-96, train loss: 4.27379037, valid loss: 1893.04082803
Iter-97, train loss: 4.26100324, valid loss: 1897.88248670
Iter-98, train loss: 4.24796628, valid loss: 1901.56934095
Iter-99, train loss: 4.23469748, valid loss: 1905.77374494
Iter-100, train loss: 4.22121951, valid loss: 1905.86588184
Iter-101, train loss: 4.20755894, valid loss: 1905.65759830
Iter-102, train loss: 4.19374538, valid loss: 1903.09512777
Iter-103, train loss: 4.17981064, valid loss: 1903.38160765
Iter-104, train loss: 4.16578784, valid loss: 1898.07106167
Iter-105, train loss: 4.15171067, valid loss: 1894.81453651
Iter-106, train loss: 4.13761265, valid loss: 1898.70172440
Iter-107, train loss: 4.12352654, valid loss: 1884.55201811
Iter-108, train loss: 4.10948389, valid loss: 1863.51785042
Iter-109, train loss: 4.09551467, valid loss: 1788.06600491
Iter-110, train loss: 4.08164702, valid loss: 1793.66124641
Iter-111, train loss: 4.06790713, valid loss: 1796.94844812
Iter-112, train loss: 4.05431917, valid loss: 1799.82169571
Iter-113, train loss: 4.04090532, valid loss: 1802.16164745
Iter-114, train loss: 4.02768580, valid loss: 1794.08426460
Iter-115, train loss: 4.01467906, valid loss: 1792.72343779
Iter-116, train loss: 4.00190182, valid loss: 1788.83486177
Iter-117, train loss: 3.98936924, valid loss: 1819.57047147
Iter-118, train loss: 3.97709510, valid loss: 1818.25296849
Iter-119, train loss: 3.96509184, valid loss: 1823.30561481
Iter-120, train loss: 3.95337077, valid loss: 1819.99187999
Iter-121, train loss: 3.94194207, valid loss: 1833.00314292
Iter-122, train loss: 3.93081493, valid loss: 1823.45828204
Iter-123, train loss: 3.91999759, valid loss: 1841.82382887
Iter-124, train loss: 3.90949735, valid loss: 1832.99309487
Iter-125, train loss: 3.89932062, valid loss: 1853.99199355
Iter-126, train loss: 3.88947291, valid loss: 1837.82036697
Iter-127, train loss: 3.87995885, valid loss: 1811.69726971
Iter-128, train loss: 3.87078218, valid loss: 1804.21031700
Iter-129, train loss: 3.86194573, valid loss: 1873.10996802
Iter-130, train loss: 3.85345145, valid loss: 1845.75462787
Iter-131, train loss: 3.84530038, valid loss: 1810.40273546
Iter-132, train loss: 3.83749268, valid loss: 1819.52445189
Iter-133, train loss: 3.83002764, valid loss: 1820.80446943
Iter-134, train loss: 3.82290368, valid loss: 1811.23672282
Iter-135, train loss: 3.81611840, valid loss: 1819.74483173
Iter-136, train loss: 3.80966860, valid loss: 1817.01307933
Iter-137, train loss: 3.80355036, valid loss: 1818.17583243
Iter-138, train loss: 3.79775903, valid loss: 1823.36150815
Iter-139, train loss: 3.79228929, valid loss: 1823.47853286
Iter-140, train loss: 3.78713527, valid loss: 1824.81344737
Iter-141, train loss: 3.78229051, valid loss: 1824.44843008
Iter-142, train loss: 3.77774810, valid loss: 1824.69866768
Iter-143, train loss: 3.77350067, valid loss: 1817.03959519
Iter-144, train loss: 3.76954049, valid loss: 1821.70889914
Iter-145, train loss: 3.76585954, valid loss: 1792.33482464
Iter-146, train loss: 3.76244952, valid loss: 1797.00294900
Iter-147, train loss: 3.75930192, valid loss: 1795.30541429
Iter-148, train loss: 3.75640808, valid loss: 1803.59366127
Iter-149, train loss: 3.75375925, valid loss: 1796.59742260
Iter-150, train loss: 3.75134659, valid loss: 1803.23673550
Iter-151, train loss: 3.74916125, valid loss: 1800.34972881
Iter-152, train loss: 3.74719441, valid loss: 1805.91552369
Iter-153, train loss: 3.74543727, valid loss: 1806.31092073
Iter-154, train loss: 3.74388114, valid loss: 1805.31539929
Iter-155, train loss: 3.74251743, valid loss: 1812.46429153
Iter-156, train loss: 3.74133769, valid loss: 1801.15011243
Iter-157, train loss: 3.74033363, valid loss: 1801.68106275
Iter-158, train loss: 3.73949713, valid loss: 1811.46977710
Iter-159, train loss: 3.73882027, valid loss: 1814.58620119
Iter-160, train loss: 3.73829535, valid loss: 1817.30310799
Iter-161, train loss: 3.73791489, valid loss: 1810.97167103
Iter-162, train loss: 3.73767162, valid loss: 1796.16672991
Iter-163, train loss: 3.73755855, valid loss: 1799.43768864
Iter-164, train loss: 3.73756891, valid loss: 1791.53602600
Iter-165, train loss: 3.73769617, valid loss: 1812.79622566
Iter-166, train loss: 3.73793410, valid loss: 1752.89972164
Iter-167, train loss: 3.73827667, valid loss: 1752.55456166
Iter-168, train loss: 3.73871814, valid loss: 1741.04875663
Iter-169, train loss: 3.73925303, valid loss: 1756.01425208
Iter-170, train loss: 3.73987608, valid loss: 1736.83060761
Iter-171, train loss: 3.74058232, valid loss: 1793.28354293
Iter-172, train loss: 3.74136699, valid loss: 1728.74724746
Iter-173, train loss: 3.74222560, valid loss: 1733.93080132
Iter-174, train loss: 3.74315388, valid loss: 1719.60225560
Iter-175, train loss: 3.74414779, valid loss: 1770.48121848
Iter-176, train loss: 3.74520353, valid loss: 1792.84347862
Iter-177, train loss: 3.74631751, valid loss: 1780.10939941
Iter-178, train loss: 3.74748636, valid loss: 1734.13909575
Iter-179, train loss: 3.74870688, valid loss: 1730.88774004
Iter-180, train loss: 3.74997612, valid loss: 1759.59781000
Iter-181, train loss: 3.75129129, valid loss: 1713.90961395
Iter-182, train loss: 3.75264978, valid loss: 1764.64871674
Iter-183, train loss: 3.75404915, valid loss: 1717.25812301
Iter-184, train loss: 3.75548715, valid loss: 1764.91921246
Iter-185, train loss: 3.75696166, valid loss: 1702.60677368
Iter-186, train loss: 3.75847073, valid loss: 1745.57039429
Iter-187, train loss: 3.76001255, valid loss: 1722.83482034
Iter-188, train loss: 3.76158542, valid loss: 1737.58967706
Iter-189, train loss: 3.76318780, valid loss: 1697.26922930
Iter-190, train loss: 3.76481823, valid loss: 1671.85178135
Iter-191, train loss: 3.76647540, valid loss: 1688.37612972
Iter-192, train loss: 3.76815805, valid loss: 1695.96293622
Iter-193, train loss: 3.76986507, valid loss: 1674.50707875
Iter-194, train loss: 3.77159539, valid loss: 1705.69782244
Iter-195, train loss: 3.77334805, valid loss: 1682.86476918
Iter-196, train loss: 3.77512213, valid loss: 1726.79877612
Iter-197, train loss: 3.77691680, valid loss: 1717.18578052
Iter-198, train loss: 3.77873127, valid loss: 1691.45171880
Iter-199, train loss: 3.78056480, valid loss: 1635.70965860
Iter-200, train loss: 3.78241671, valid loss: 1640.76918901
Out[9]:
<__main__.GRU at 0x7f56d7433320>

In [10]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[0:100, 2], label='y_pred')
plt.plot(Y_valid[0:100, 2], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()

In [ ]:


In [ ]: