In [1]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Showing the data file csv or comma separated value
data[:10]


Out[1]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [2]:
# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [3]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, 2:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [4]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[4]:
(-2.0933188371550222e-17,
 0.99999999999999978,
 0.99999999999999967,
 (17379, 15),
 dtype('float64'))

In [5]:
train_data = data_norm[:17000, -1] # the last dim/variable/feature
test_data = data_norm[17000:, -1] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:16999]
Y_train = train_data[1:17000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [6]:
X_valid = test_data[0:378] 
Y_valid = test_data[1:379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [54]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, p_dropout, lam):
        self.D = D
        self.H = H
        self.p_dropout = p_dropout
        self.lam = lam
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = [] # from left to right
        for _ in range(2):
            self.model.append(m) # bidirectional: from right to left
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, caches, do_caches = [], [], []
        for _ in range(2):
            ys.append([])
            caches.append([])
            do_caches.append([])

        # Left to right
        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model[0])
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches[0].append(cache)
            do_caches[0].append(do_cache)
            ys[0].append(y) # Be careful about append
        
        # Bidirectional: right to left
        ys[1] = ys[0]#.copy() # for the shape/ len(ys[0])
        for t in reversed(range(len(X_train))):
            X = X_train[t].reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model[1])
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches[1].append(cache)
            do_caches[1].append(do_cache)
            ys[1][t] = y.copy()
            
        # ys_2xtx1xn
        ys = (np.array(ys[0], dtype=float) + np.array(ys[1], dtype=float))/ 2 # ys_1xtx1xn
        ys = ys.reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        
        return ys, caches, do_caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train): # both self.model[0] & [1] have equal structure or key elements!
            loss += l2_regression_reg(model=self.model[0], y_pred=y, y_train=Y, lam=self.lam)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys # this would be fed into both RNNs directions: l2r & r2l
    
    def train_backward(self, dys, caches, do_caches):
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model[0].items()} # for both is the same
        grads = []
        
        # Gradient for bi-directional RNN or the both-direction of RNN
        for _ in range(2):
            grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Left to right: backward
        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[0][t])
            _, dh, grad = self.backward(dy, dh, caches[0][t])
            for key in grad.keys():
                grads[0][key] += grad[key]
                
        # Right to left: backward
        for t in range(len(dys)):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[1][t])
            _, dh, grad = self.backward(dy, dh, caches[1][t])
            for key in grad.keys():
                grads[1][key] += grad[key]
                
        return grads

    # This is the problem in the bidirectional-RNN for seq-learning & prediction= Xt is NOT existing!
    def test(self, X_seed, h, size):
        ys = []
        for _ in range(2):
            ys.append([])
            
        X = X_seed.reshape(1, -1)
        
        # Left to right
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model[0])
            X = y.copy() # previous out for the next input for prediction
            ys[0].append(y) # list array
        
        # Right to left
        ys[1] = ys[0]#.copy()
        for t in reversed(range(size)):
            y, h, _ = self.forward(X, h, self.model[1])
            X = y.copy() # previous out for the next input for prediction
            ys[1][t] = y.copy() # list array
            
        # ys_2xtx1xn
        ys = (np.array(ys[0], dtype=float) + np.array(ys[1], dtype=float))/ 2 # ys_1xtx1xn
        # print('ys.shape', ys.shape)
        ys = ys.reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [56]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    M, R = [], []
    for _ in range(2):
        M.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        
    beta1 = .99
    beta2 = .999
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    for iter in range(1, n_iter + 1):
        for idx in range(len(minibatches)):
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches, do_caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            grads = nn.train_backward(dys, caches, do_caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for num in range(2):
                for key in grads[num].keys(): #key, value: items
                    M[num][key] = l.exp_running_avg(M[num][key], grads[num][key], beta1)
                    R[num][key] = l.exp_running_avg(R[num][key], grads[num][key]**2, beta2)
                    m_k_hat = M[num][key] / (1. - (beta1 ** iter))
                    r_k_hat = R[num][key] / (1. - (beta2 ** iter))
                    nn.model[num][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [57]:
# Hyper-parameters
time_step = 100 # minibatch size
n_iter = 300 # epochs
alpha = 1e-4 # learning_rate
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = 1 # X_train.shape[1] # X_txn
p_dropout = 0.95 # p_dropout == keep_prob: probability of keeping neurons/units
lam = 1e-4 # regularization

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=p_dropout, lam=lam) #, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 10.15126819, valid loss: 962.78406016
Iter-2, train loss: 5.42623298, valid loss: 985.17834041
Iter-3, train loss: 7.56674855, valid loss: 550.86218903
Iter-4, train loss: 7.98945766, valid loss: 131.09563459
Iter-5, train loss: 4.26485178, valid loss: 217.53752597
Iter-6, train loss: 3.66184637, valid loss: 183.61946810
Iter-7, train loss: 3.50256709, valid loss: 103.09928214
Iter-8, train loss: 4.10012908, valid loss: 183.21901176
Iter-9, train loss: 3.28272931, valid loss: 137.31214014
Iter-10, train loss: 4.00964416, valid loss: 177.12759306
Iter-11, train loss: 3.13583796, valid loss: 106.13564216
Iter-12, train loss: 3.14709992, valid loss: 174.89518397
Iter-13, train loss: 3.42035824, valid loss: 154.39315973
Iter-14, train loss: 4.81015134, valid loss: 104.63995597
Iter-15, train loss: 6.83940123, valid loss: 130.33911827
Iter-16, train loss: 6.32473817, valid loss: 112.73925045
Iter-17, train loss: 3.21272954, valid loss: 102.85207011
Iter-18, train loss: 2.78022651, valid loss: 103.09198658
Iter-19, train loss: 4.72873264, valid loss: 112.66876545
Iter-20, train loss: 4.16340300, valid loss: 107.67254751
Iter-21, train loss: 3.25628455, valid loss: 106.48868066
Iter-22, train loss: 3.14203522, valid loss: 113.33509673
Iter-23, train loss: 4.25975561, valid loss: 118.72113933
Iter-24, train loss: 8.62474558, valid loss: 118.00301457
Iter-25, train loss: 5.10793153, valid loss: 1645.83411754
Iter-26, train loss: 3.38791347, valid loss: 113.44995371
Iter-27, train loss: 4.85344484, valid loss: 1432.71574610
Iter-28, train loss: 6.02539418, valid loss: 2133.61698511
Iter-29, train loss: 3.76711087, valid loss: 4349.52993457
Iter-30, train loss: 3.78004276, valid loss: 6694.99726667
Iter-31, train loss: 2.93314708, valid loss: 5254.03700286
Iter-32, train loss: 3.73531410, valid loss: 7123.48541089
Iter-33, train loss: 3.91024080, valid loss: 127.06912766
Iter-34, train loss: 6.57814265, valid loss: 147.08312299
Iter-35, train loss: 4.41181640, valid loss: 2181.13215823
Iter-36, train loss: 3.37408618, valid loss: 117.77435151
Iter-37, train loss: 3.91632026, valid loss: 99.02192023
Iter-38, train loss: 7.02520484, valid loss: 115.26543404
Iter-39, train loss: 6.58035431, valid loss: 4289.79835154
Iter-40, train loss: 2.94503994, valid loss: 110.04890889
Iter-41, train loss: 3.41961227, valid loss: 3726.26806610
Iter-42, train loss: 4.58148208, valid loss: 4586.35283852
Iter-43, train loss: 5.34829384, valid loss: 6283.64797055
Iter-44, train loss: 4.41524410, valid loss: 7449.12177504
Iter-45, train loss: 4.96023305, valid loss: 106.00321326
Iter-46, train loss: 3.85936498, valid loss: 188.76746247
Iter-47, train loss: 4.54266099, valid loss: 102.75789364
Iter-48, train loss: 3.75564354, valid loss: 104.26799783
Iter-49, train loss: 4.50228477, valid loss: 104.85157805
Iter-50, train loss: 4.89772388, valid loss: 143.04062647
Iter-51, train loss: 4.53158958, valid loss: 8638.94514355
Iter-52, train loss: 7.94690861, valid loss: 109.25282104
Iter-53, train loss: 3.92716715, valid loss: 154.87935913
Iter-54, train loss: 2.98923426, valid loss: 245.26920665
Iter-55, train loss: 4.93346153, valid loss: 120.93883343
Iter-56, train loss: 5.82318378, valid loss: 114.48788411
Iter-57, train loss: 6.33687391, valid loss: 102.99172257
Iter-58, train loss: 7.54447505, valid loss: 142.34809575
Iter-59, train loss: 4.18583216, valid loss: 160.72467880
Iter-60, train loss: 4.00690867, valid loss: 159.43650999
Iter-61, train loss: 4.62928296, valid loss: 214.52889738
Iter-62, train loss: 5.92458019, valid loss: 170.36910192
Iter-63, train loss: 3.44767563, valid loss: 129.82122433
Iter-64, train loss: 9.70788204, valid loss: 115.35559396
Iter-65, train loss: 4.19956718, valid loss: 109.53678300
Iter-66, train loss: 3.34341600, valid loss: 116.07668349
Iter-67, train loss: 3.87849490, valid loss: 116.85531665
Iter-68, train loss: 4.45675256, valid loss: 118.44303202
Iter-69, train loss: 4.76842355, valid loss: 124.37208175
Iter-70, train loss: 8.53571669, valid loss: 121.19664985
Iter-71, train loss: 7.37343490, valid loss: 122.87961559
Iter-72, train loss: 3.41935124, valid loss: 107.91678479
Iter-73, train loss: 3.36297363, valid loss: 710.39218521
Iter-74, train loss: 4.16012851, valid loss: 6246.76341029
Iter-75, train loss: 4.01160080, valid loss: 8892.78788999
Iter-76, train loss: 4.33181213, valid loss: 6932.58340032
Iter-77, train loss: 6.50283719, valid loss: 201.85977065
Iter-78, train loss: 7.19608174, valid loss: 183.79549832
Iter-79, train loss: 5.77338794, valid loss: 179.60862164
Iter-80, train loss: 3.05596653, valid loss: 239.57898973
Iter-81, train loss: 5.64970433, valid loss: 235.03382648
Iter-82, train loss: 5.80193598, valid loss: 227.57026283
Iter-83, train loss: 4.47496337, valid loss: 207.00374047
Iter-84, train loss: 3.60047100, valid loss: 119.35620594
Iter-85, train loss: 8.81195842, valid loss: 110.42251323
Iter-86, train loss: 4.81042112, valid loss: 107.32973478
Iter-87, train loss: 7.65968277, valid loss: 3795.48402331
Iter-88, train loss: 4.20240497, valid loss: 864.85983565
Iter-89, train loss: 3.21723778, valid loss: 613.45439025
Iter-90, train loss: 6.86276259, valid loss: 289.11551652
Iter-91, train loss: 3.90033648, valid loss: 356.85664538
Iter-92, train loss: 8.63028561, valid loss: 308.79567420
Iter-93, train loss: 3.93502333, valid loss: 369.56315451
Iter-94, train loss: 7.16493186, valid loss: 399.70250697
Iter-95, train loss: 4.73854592, valid loss: 418.79001481
Iter-96, train loss: 3.38388799, valid loss: 467.09553113
Iter-97, train loss: 4.92047032, valid loss: 478.58831098
Iter-98, train loss: 5.63881885, valid loss: 497.98279939
Iter-99, train loss: 3.51030171, valid loss: 505.78632153
Iter-100, train loss: 5.19716856, valid loss: 499.58043829
Iter-101, train loss: 3.91317173, valid loss: 489.97731304
Iter-102, train loss: 4.31695796, valid loss: 488.16182123
Iter-103, train loss: 5.15945826, valid loss: 492.39916144
Iter-104, train loss: 3.29796611, valid loss: 492.90458490
Iter-105, train loss: 3.55153296, valid loss: 494.69145898
Iter-106, train loss: 7.14946168, valid loss: 500.57027794
Iter-107, train loss: 3.70368364, valid loss: 493.74912583
Iter-108, train loss: 4.07076117, valid loss: 473.85613108
Iter-109, train loss: 6.79793670, valid loss: 414.11101088
Iter-110, train loss: 6.49379066, valid loss: 1535.88891504
Iter-111, train loss: 3.94146791, valid loss: 1234.18501516
Iter-112, train loss: 3.42908596, valid loss: 4443.84111453
Iter-113, train loss: 5.90149844, valid loss: 159.39599930
Iter-114, train loss: 4.71234718, valid loss: 581.96175168
Iter-115, train loss: 7.91690442, valid loss: 527.86141524
Iter-116, train loss: 3.89142864, valid loss: 584.90544928
Iter-117, train loss: 3.90993990, valid loss: 649.67866600
Iter-118, train loss: 5.13343762, valid loss: 664.26601793
Iter-119, train loss: 5.65110620, valid loss: 673.42303618
Iter-120, train loss: 3.14774531, valid loss: 701.47080363
Iter-121, train loss: 3.58025618, valid loss: 723.12062606
Iter-122, train loss: 5.08575579, valid loss: 740.90931530
Iter-123, train loss: 8.46928769, valid loss: 762.09534963
Iter-124, train loss: 4.43862857, valid loss: 781.64623345
Iter-125, train loss: 7.95214089, valid loss: 792.08792967
Iter-126, train loss: 3.34026180, valid loss: 220.58067052
Iter-127, train loss: 6.02855419, valid loss: 248.42297849
Iter-128, train loss: 4.15995026, valid loss: 129.36387079
Iter-129, train loss: 5.32735726, valid loss: 137.90176184
Iter-130, train loss: 10.00072721, valid loss: 168.01707195
Iter-131, train loss: 3.92761927, valid loss: 137.37339604
Iter-132, train loss: 3.73018758, valid loss: 251.69390328
Iter-133, train loss: 5.65854672, valid loss: 247.40160182
Iter-134, train loss: 5.34071926, valid loss: 113.35535656
Iter-135, train loss: 4.33248570, valid loss: 109.40139041
Iter-136, train loss: 3.74582406, valid loss: 189.19725358
Iter-137, train loss: 6.40940745, valid loss: 364.33440749
Iter-138, train loss: 5.20539266, valid loss: 581.04936741
Iter-139, train loss: 4.94471446, valid loss: 719.79319462
Iter-140, train loss: 5.52300143, valid loss: 784.24837195
Iter-141, train loss: 5.59715868, valid loss: 809.98683399
Iter-142, train loss: 4.11493567, valid loss: 812.28782351
Iter-143, train loss: 8.37163178, valid loss: 186.52843615
Iter-144, train loss: 3.59485330, valid loss: 128.01088527
Iter-145, train loss: 4.37234567, valid loss: 576.25223157
Iter-146, train loss: 12.20094629, valid loss: 214.30073681
Iter-147, train loss: 7.04709069, valid loss: 105.86577620
Iter-148, train loss: 4.66920292, valid loss: 329.86466279
Iter-149, train loss: 8.75077353, valid loss: 372.26030098
Iter-150, train loss: 7.15306598, valid loss: 687.48796320
Iter-151, train loss: 3.93410786, valid loss: 828.55041329
Iter-152, train loss: 4.40864078, valid loss: 881.88893998
Iter-153, train loss: 3.27666863, valid loss: 925.36518622
Iter-154, train loss: 4.28910272, valid loss: 936.40182895
Iter-155, train loss: 6.97026853, valid loss: 876.35703212
Iter-156, train loss: 6.72572208, valid loss: 162.05945105
Iter-157, train loss: 7.12116865, valid loss: 185.32057682
Iter-158, train loss: 4.38281750, valid loss: 112.92030436
Iter-159, train loss: 3.85202462, valid loss: 107.78503211
Iter-160, train loss: 7.09210484, valid loss: 146.69300305
Iter-161, train loss: 4.59941769, valid loss: 118.39569552
Iter-162, train loss: 3.78098681, valid loss: 115.79239328
Iter-163, train loss: 4.54977323, valid loss: 219.83566841
Iter-164, train loss: 5.52843189, valid loss: 499.40422798
Iter-165, train loss: 5.73349939, valid loss: 850.52276505
Iter-166, train loss: 9.54359837, valid loss: 1119.43793382
Iter-167, train loss: 9.10904077, valid loss: 1201.03757051
Iter-168, train loss: 3.48834618, valid loss: 1186.03080661
Iter-169, train loss: 4.68053020, valid loss: 1062.66802863
Iter-170, train loss: 4.36219716, valid loss: 524.02427005
Iter-171, train loss: 3.48558755, valid loss: 302.96079486
Iter-172, train loss: 7.28472467, valid loss: 530.87633649
Iter-173, train loss: 5.45401380, valid loss: 467.50928199
Iter-174, train loss: 3.66289831, valid loss: 349.74553779
Iter-175, train loss: 7.01807780, valid loss: 519.88390239
Iter-176, train loss: 9.41896985, valid loss: 800.33210379
Iter-177, train loss: 5.24742130, valid loss: 1025.27583820
Iter-178, train loss: 6.59836400, valid loss: 1082.55847175
Iter-179, train loss: 4.27550044, valid loss: 1026.15092034
Iter-180, train loss: 9.00199124, valid loss: 863.52050299
Iter-181, train loss: 4.44176539, valid loss: 407.33239052
Iter-182, train loss: 8.09280019, valid loss: 125.14796596
Iter-183, train loss: 4.58540514, valid loss: 667.92553607
Iter-184, train loss: 4.82987458, valid loss: 578.91556531
Iter-185, train loss: 4.03622506, valid loss: 437.33290538
Iter-186, train loss: 4.30753238, valid loss: 375.21728099
Iter-187, train loss: 4.83857920, valid loss: 348.23053176
Iter-188, train loss: 4.38447577, valid loss: 610.13577366
Iter-189, train loss: 4.66479104, valid loss: 743.58697728
Iter-190, train loss: 5.79062565, valid loss: 967.90083474
Iter-191, train loss: 4.31539790, valid loss: 1230.89633028
Iter-192, train loss: 13.03983826, valid loss: 1305.19509413
Iter-193, train loss: 3.91648117, valid loss: 1258.60342506
Iter-194, train loss: 3.52027194, valid loss: 1209.95707371
Iter-195, train loss: 4.37680337, valid loss: 1193.67794319
Iter-196, train loss: 8.21601579, valid loss: 1084.84406148
Iter-197, train loss: 3.91017835, valid loss: 693.23729497
Iter-198, train loss: 4.89584680, valid loss: 536.47793818
Iter-199, train loss: 4.60790495, valid loss: 127.96954475
Iter-200, train loss: 9.63158215, valid loss: 7259.24480059
Iter-201, train loss: 4.18598350, valid loss: 105.27846480
Iter-202, train loss: 10.82279228, valid loss: 118.70141181
Iter-203, train loss: 4.18203452, valid loss: 150.70572610
Iter-204, train loss: 6.45545494, valid loss: 413.06746278
Iter-205, train loss: 6.85339612, valid loss: 259.35418058
Iter-206, train loss: 8.21790961, valid loss: 263.81477283
Iter-207, train loss: 4.71016769, valid loss: 601.14684614
Iter-208, train loss: 4.70664673, valid loss: 676.94342183
Iter-209, train loss: 3.82336933, valid loss: 792.26712087
Iter-210, train loss: 8.76561395, valid loss: 878.87630600
Iter-211, train loss: 3.84082424, valid loss: 777.10793945
Iter-212, train loss: 7.70979292, valid loss: 258.82519620
Iter-213, train loss: 4.55866416, valid loss: 6424.50354257
Iter-214, train loss: 8.60620107, valid loss: 121.52740193
Iter-215, train loss: 7.64692928, valid loss: 155.04951041
Iter-216, train loss: 5.62690134, valid loss: 144.59372688
Iter-217, train loss: 5.19985269, valid loss: 531.68501618
Iter-218, train loss: 5.29662041, valid loss: 122.58712410
Iter-219, train loss: 4.23499782, valid loss: 229.90650167
Iter-220, train loss: 8.71116788, valid loss: 640.32158398
Iter-221, train loss: 5.61860556, valid loss: 831.56960102
Iter-222, train loss: 4.15306406, valid loss: 1120.86203029
Iter-223, train loss: 5.81269471, valid loss: 1463.62754983
Iter-224, train loss: 7.13900583, valid loss: 1601.03979192
Iter-225, train loss: 8.88924993, valid loss: 1684.13698231
Iter-226, train loss: 7.70686758, valid loss: 1753.36015153
Iter-227, train loss: 8.68855123, valid loss: 1778.85228031
Iter-228, train loss: 8.11353941, valid loss: 1668.15181539
Iter-229, train loss: 4.91051938, valid loss: 206.49950662
Iter-230, train loss: 3.55177844, valid loss: 1339.18856855
Iter-231, train loss: 5.71054077, valid loss: 704.05215483
Iter-232, train loss: 4.77596655, valid loss: 108.15291300
Iter-233, train loss: 3.56625950, valid loss: 134.20578690
Iter-234, train loss: 9.53022350, valid loss: 105.81905998
Iter-235, train loss: 5.09772409, valid loss: 208.28985024
Iter-236, train loss: 4.81596410, valid loss: 235.03892175
Iter-237, train loss: 3.47545281, valid loss: 526.72854239
Iter-238, train loss: 4.75100654, valid loss: 908.83648971
Iter-239, train loss: 5.27953409, valid loss: 897.03219498
Iter-240, train loss: 4.42460613, valid loss: 1115.72996500
Iter-241, train loss: 8.26501098, valid loss: 1020.41124282
Iter-242, train loss: 6.90928641, valid loss: 1277.51007548
Iter-243, train loss: 8.13395707, valid loss: 1611.94700562
Iter-244, train loss: 7.14832345, valid loss: 1786.99582006
Iter-245, train loss: 4.88684509, valid loss: 2081.88034793
Iter-246, train loss: 5.54684739, valid loss: 5869.09049951
Iter-247, train loss: 3.79974085, valid loss: 324.23629664
Iter-248, train loss: 6.07399929, valid loss: 266.93836182
Iter-249, train loss: 5.57249134, valid loss: 3826.39676328
Iter-250, train loss: 5.04242472, valid loss: 865.52080224
Iter-251, train loss: 3.72934866, valid loss: 316.35234328
Iter-252, train loss: 4.80736527, valid loss: 889.39991292
Iter-253, train loss: 4.12097443, valid loss: 127.17652851
Iter-254, train loss: 7.94073918, valid loss: 1013.76421676
Iter-255, train loss: 5.92226860, valid loss: 179.89137543
Iter-256, train loss: 6.57118508, valid loss: 967.11027936
Iter-257, train loss: 3.82275918, valid loss: 1347.43109253
Iter-258, train loss: 4.62283047, valid loss: 199.41524896
Iter-259, train loss: 7.53012289, valid loss: 131.94566756
Iter-260, train loss: 5.03276071, valid loss: 122.85114088
Iter-261, train loss: 5.88391682, valid loss: 182.32136935
Iter-262, train loss: 4.95330696, valid loss: 144.36208832
Iter-263, train loss: 4.53918140, valid loss: 150.50396801
Iter-264, train loss: 4.41376949, valid loss: 125.83507868
Iter-265, train loss: 9.56133231, valid loss: 173.92390042
Iter-266, train loss: 4.16554244, valid loss: 1410.39368826
Iter-267, train loss: 4.55649741, valid loss: 1870.59705641
Iter-268, train loss: 7.69885681, valid loss: 1710.89948496
Iter-269, train loss: 5.79153784, valid loss: 1661.46770622
Iter-270, train loss: 5.25746124, valid loss: 2088.71524959
Iter-271, train loss: 11.29585345, valid loss: 7315.62380769
Iter-272, train loss: 5.09800337, valid loss: 2069.65464091
Iter-273, train loss: 5.72269580, valid loss: 281.72763553
Iter-274, train loss: 9.41255539, valid loss: 120.25427761
Iter-275, train loss: 8.18556429, valid loss: 573.17498520
Iter-276, train loss: 7.62969312, valid loss: 540.47068317
Iter-277, train loss: 8.93800521, valid loss: 99.96727836
Iter-278, train loss: 6.10161188, valid loss: 191.82798553
Iter-279, train loss: 4.78534022, valid loss: 154.08609536
Iter-280, train loss: 5.82847040, valid loss: 158.17419558
Iter-281, train loss: 7.18071903, valid loss: 213.81154496
Iter-282, train loss: 8.02577445, valid loss: 1356.59126604
Iter-283, train loss: 5.91170339, valid loss: 1330.40789154
Iter-284, train loss: 3.92634808, valid loss: 490.35957087
Iter-285, train loss: 7.84580420, valid loss: 136.34774284
Iter-286, train loss: 7.00830898, valid loss: 169.38927333
Iter-287, train loss: 5.41032709, valid loss: 9577.00253020
Iter-288, train loss: 5.32816113, valid loss: 1564.47558890
Iter-289, train loss: 4.73553629, valid loss: 165.51594102
Iter-290, train loss: 5.52809459, valid loss: 2507.96188373
Iter-291, train loss: 5.26640451, valid loss: 2067.12826392
Iter-292, train loss: 7.28154076, valid loss: 1852.58758792
Iter-293, train loss: 6.25210089, valid loss: 9069.70278414
Iter-294, train loss: 5.12179386, valid loss: 247.22230799
Iter-295, train loss: 4.61879416, valid loss: 1177.60822624
Iter-296, train loss: 9.57093359, valid loss: 292.23758674
Iter-297, train loss: 5.99099841, valid loss: 1609.42593785
Iter-298, train loss: 4.51076724, valid loss: 1397.65414594
Iter-299, train loss: 8.08028702, valid loss: 159.97145899
Iter-300, train loss: 4.50824434, valid loss: 187.43933666
Out[57]:
<__main__.GRU at 0x7ff7fcd7a710>

In [58]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [59]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [61]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:], label='y_pred')
plt.plot(Y_valid[:], label='Y_valid')
plt.plot(X_valid[:], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: