In [50]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[50]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [51]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [52]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [53]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[53]:
(-1.0548364452851478e-16, 1.0, 1.0, (17379, 1), dtype('float64'))

In [54]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [55]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [56]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, L, p_dropout):
        self.D = D
        self.H = H
        self.L = L
        self.p_dropout = p_dropout
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = []
        for _ in range(self.L):
            self.model.append(m)
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches, do_caches = [], [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            for layer in range(self.L):
                y, h, fc_cache = self.forward(X, h, self.model[layer])
                y, do_cache = l.dropout_forward(y, self.p_dropout)
                X = y.copy()
            fc_caches.append(fc_cache)
            do_caches.append(do_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = (fc_caches, do_caches)
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches, do_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model[0].items()}
        grads = [] #{key: np.zeros_like(val) for key, val in self.model.items()}
        for _ in range(self.L):
            grads.append(grad)

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            for layer in reversed(range(self.L)):
                dy = l.dropout_backward(dy, do_caches[t])
                dX, dh, grad = self.backward(dy, dh, fc_caches[t])
                dy = dX.copy() # for the previous layer
                for key in grad.keys():
                    grads[layer][key] += grad[key]
                
        return dX, grads
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            for layer in range(self.L):
                y, h, _ = self.forward(X, h, self.model[layer])
                X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [57]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    R = [] # {key: np.zeros_like(val) for key, val in nn.model.items()}
    for _ in range(nn.L):
        M.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in nn.model[0].items()})
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for layer in range(nn.L):
                for key in grads[0].keys(): # key, value, items
                    M[layer][key] = l.exp_running_avg(M[layer][key], grads[layer][key], beta1)
                    R[layer][key] = l.exp_running_avg(R[layer][key], grads[layer][key]**2, beta2)
                    m_k_hat = M[layer][key] / (1. - (beta1** iter))
                    r_k_hat = R[layer][key] / (1. - (beta2** iter))
                    nn.model[layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [ ]:
# Hyper-parameters
time_step = 64 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 1000 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features
num_hidden_layers = 2 # number of hidden layers
keep_prob = 0.95 # p_dropout == keep_prob

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=keep_prob, L=num_hidden_layers) # , lam=lam, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 30.39486315, valid loss: 7318.29415267
Iter-2, train loss: 29.64349009, valid loss: 13738.53255601
Iter-3, train loss: 28.72681695, valid loss: 11671.23727795
Iter-4, train loss: 31.54554984, valid loss: 13900.43439750
Iter-5, train loss: 28.83088946, valid loss: 11042.78495674
Iter-6, train loss: 30.65733613, valid loss: 12280.49311737
Iter-7, train loss: 26.92784670, valid loss: 11680.63514099
Iter-8, train loss: 30.73785606, valid loss: 12317.02033144
Iter-9, train loss: 29.82797789, valid loss: 11691.30135061
Iter-10, train loss: 29.52103421, valid loss: 11643.70881558
Iter-11, train loss: 26.87585099, valid loss: 3205.36634822
Iter-12, train loss: 30.86773611, valid loss: 13445.78320119
Iter-13, train loss: 30.42718403, valid loss: 12973.85026492
Iter-14, train loss: 29.98827013, valid loss: 13844.96038412
Iter-15, train loss: 30.61486638, valid loss: 2139.27476231
Iter-16, train loss: 30.26094762, valid loss: 13464.61868968
Iter-17, train loss: 30.50169174, valid loss: 3300.31374889
Iter-18, train loss: 30.47373584, valid loss: 3454.60029027
Iter-19, train loss: 30.80710086, valid loss: 3145.58229279
Iter-20, train loss: 30.83738303, valid loss: 2580.09650209
Iter-21, train loss: 28.73665314, valid loss: 3204.78382814
Iter-22, train loss: 30.78931562, valid loss: 2586.35556638
Iter-23, train loss: 29.28467549, valid loss: 2751.41116427
Iter-24, train loss: 30.82702688, valid loss: 2967.06580102
Iter-25, train loss: 30.31602260, valid loss: 2872.65658898
Iter-26, train loss: 27.53690945, valid loss: 2877.43763110
Iter-27, train loss: 28.41852012, valid loss: 2932.71907856
Iter-28, train loss: 30.37161043, valid loss: 1827.49926816
Iter-29, train loss: 30.78931562, valid loss: 3008.54403330
Iter-30, train loss: 31.53770634, valid loss: 3377.03271611
Iter-31, train loss: 28.85143418, valid loss: 3322.78844041
Iter-32, train loss: 30.32611737, valid loss: 3505.15816296
Iter-33, train loss: 30.27762452, valid loss: 2390.50092406
Iter-34, train loss: 30.21309156, valid loss: 3258.01065153
Iter-35, train loss: 28.85399470, valid loss: 2526.19260590
Iter-36, train loss: 29.85741310, valid loss: 1457.20574245
Iter-37, train loss: 29.53983156, valid loss: 2524.09611998
Iter-38, train loss: 30.98324745, valid loss: 2758.24227244
Iter-39, train loss: 29.52213854, valid loss: 1792.33473417
Iter-40, train loss: 29.53220431, valid loss: 2424.76614006
Iter-41, train loss: 30.57580814, valid loss: 2522.24682341
Iter-42, train loss: 30.49670284, valid loss: 2609.71444042
Iter-43, train loss: 30.14064631, valid loss: 2113.32818713
Iter-44, train loss: 29.73726969, valid loss: 2568.34184628
Iter-45, train loss: 27.68300108, valid loss: 1879.11076767
Iter-46, train loss: 31.12119159, valid loss: 2031.69025171
Iter-47, train loss: 30.78931562, valid loss: 2221.76974661
Iter-48, train loss: 28.34964946, valid loss: 3169.49904293
Iter-49, train loss: 30.69873870, valid loss: 2743.09250856
Iter-50, train loss: 29.78250617, valid loss: 2429.45002106
Iter-51, train loss: 29.05980472, valid loss: 1898.14302373
Iter-52, train loss: 30.78931562, valid loss: 2657.77949722
Iter-53, train loss: 30.46307991, valid loss: 3454.42436303
Iter-54, train loss: 28.74407980, valid loss: 2385.64799615
Iter-55, train loss: 25.13778521, valid loss: 3232.13436254
Iter-56, train loss: 28.44455624, valid loss: 1028.71450544
Iter-57, train loss: 28.46397883, valid loss: 2358.15636490
Iter-58, train loss: 29.57462251, valid loss: 1545.20961511
Iter-59, train loss: 30.71303643, valid loss: 720.04086347
Iter-60, train loss: 30.18456573, valid loss: 3243.18398386
Iter-61, train loss: 29.87873163, valid loss: 1968.50438768
Iter-62, train loss: 28.73691035, valid loss: 2730.60709027
Iter-63, train loss: 28.68721005, valid loss: 2208.13952567
Iter-64, train loss: 30.78931562, valid loss: 722.73091430
Iter-65, train loss: 30.30910019, valid loss: 2055.38639201
Iter-66, train loss: 29.50122194, valid loss: 2860.74194111
Iter-67, train loss: 27.82344203, valid loss: 2295.70482561
Iter-68, train loss: 30.61646205, valid loss: 2364.29965330
Iter-69, train loss: 31.20575903, valid loss: 1940.09340097
Iter-70, train loss: 31.14784176, valid loss: 2134.76988884
Iter-71, train loss: 28.95933819, valid loss: 2420.80230727
Iter-72, train loss: 30.71905996, valid loss: 2308.59435154
Iter-73, train loss: 30.43329482, valid loss: 1979.56393604
Iter-74, train loss: 29.80665310, valid loss: 2488.62250555
Iter-75, train loss: 30.61518913, valid loss: 2309.28636323
Iter-76, train loss: 29.38815910, valid loss: 2204.25131489
Iter-77, train loss: 31.04174371, valid loss: 2324.84726202
Iter-78, train loss: 29.81734017, valid loss: 2151.11776680
Iter-79, train loss: 31.37536133, valid loss: 2022.13683930
Iter-80, train loss: 29.75703191, valid loss: 1792.66677476
Iter-81, train loss: 29.63202357, valid loss: 2221.40137950
Iter-82, train loss: 31.08100165, valid loss: 2468.37187324
Iter-83, train loss: 30.24349680, valid loss: 1619.75510480
Iter-84, train loss: 30.73112343, valid loss: 1728.05725893
Iter-85, train loss: 30.21976688, valid loss: 2268.97558308
Iter-86, train loss: 29.32283353, valid loss: 2189.21751190
Iter-87, train loss: 29.11525491, valid loss: 2418.69598566
Iter-88, train loss: 30.59320885, valid loss: 1154.73878300
Iter-89, train loss: 30.68896530, valid loss: 2432.17393226
Iter-90, train loss: 31.07896925, valid loss: 1597.92844603
Iter-91, train loss: 31.71004558, valid loss: 2133.75271574
Iter-92, train loss: 30.78399845, valid loss: 2599.99711310
Iter-93, train loss: 30.75212751, valid loss: 2405.12116347
Iter-94, train loss: 30.84129147, valid loss: 2029.39827830
Iter-95, train loss: 30.65507631, valid loss: 2173.43781432
Iter-96, train loss: 29.75132410, valid loss: 1881.38688075
Iter-97, train loss: 32.84812600, valid loss: 2230.42941333
Iter-98, train loss: 30.65766593, valid loss: 2471.71469078
Iter-99, train loss: 29.59932085, valid loss: 2401.01717422
Iter-100, train loss: 30.93588695, valid loss: 2516.73256562
Iter-101, train loss: 31.13944332, valid loss: 1809.36403726
Iter-102, train loss: 29.24648891, valid loss: 1641.07649069
Iter-103, train loss: 30.25741536, valid loss: 1714.32389301
Iter-104, train loss: 29.89012915, valid loss: 1805.84153452
Iter-105, train loss: 31.22566960, valid loss: 1445.52785515
Iter-106, train loss: 29.91912788, valid loss: 3068.38573730
Iter-107, train loss: 28.51546092, valid loss: 2135.41173297
Iter-108, train loss: 30.95959782, valid loss: 2225.41797787
Iter-109, train loss: 28.79495125, valid loss: 2040.30683184
Iter-110, train loss: 30.92967422, valid loss: 1968.62923835
Iter-111, train loss: 29.49922402, valid loss: 2339.39436362
Iter-112, train loss: 30.75860327, valid loss: 2766.94384874
Iter-113, train loss: 31.41706322, valid loss: 1981.44301962
Iter-114, train loss: 29.82775576, valid loss: 2465.72344016
Iter-115, train loss: 30.24585594, valid loss: 2406.34876404
Iter-116, train loss: 31.04425036, valid loss: 2271.38699997
Iter-117, train loss: 30.39107355, valid loss: 2264.54230640
Iter-118, train loss: 28.22465686, valid loss: 2425.03512352
Iter-119, train loss: 29.70969775, valid loss: 1609.04240888
Iter-120, train loss: 30.88745823, valid loss: 2111.15956676
Iter-121, train loss: 29.62155009, valid loss: 2008.72899721
Iter-122, train loss: 29.79833689, valid loss: 2098.17717501
Iter-123, train loss: 29.99868700, valid loss: 2450.26033111
Iter-124, train loss: 29.33973723, valid loss: 2193.86693300
Iter-125, train loss: 28.55868823, valid loss: 2109.24572092
Iter-126, train loss: 30.98412047, valid loss: 2445.38710467
Iter-127, train loss: 29.51983673, valid loss: 2230.72545823
Iter-128, train loss: 29.86928241, valid loss: 1906.40913025
Iter-129, train loss: 32.07753792, valid loss: 2341.29551644
Iter-130, train loss: 29.29187190, valid loss: 1276.19452334
Iter-131, train loss: 30.23896525, valid loss: 2421.74242422
Iter-132, train loss: 30.95247684, valid loss: 2784.98187920
Iter-133, train loss: 27.96033145, valid loss: 2649.17061555
Iter-134, train loss: 30.78931562, valid loss: 2378.93427625
Iter-135, train loss: 29.34863168, valid loss: 2054.50058603
Iter-136, train loss: 29.87485658, valid loss: 2353.98148655
Iter-137, train loss: 30.42856637, valid loss: 2458.12021356
Iter-138, train loss: 31.25831627, valid loss: 2360.73174695
Iter-139, train loss: 31.44830840, valid loss: 2165.13501793
Iter-140, train loss: 30.27493968, valid loss: 2390.27660023
Iter-141, train loss: 31.42282147, valid loss: 1943.07320209
Iter-142, train loss: 29.73762338, valid loss: 2272.97904085
Iter-143, train loss: 27.81386158, valid loss: 2151.18991323
Iter-144, train loss: 30.70509224, valid loss: 1872.80515277
Iter-145, train loss: 29.91655328, valid loss: 1592.45235258
Iter-146, train loss: 27.42951723, valid loss: 1377.04412960
Iter-147, train loss: 32.15701375, valid loss: 1705.27819900
Iter-148, train loss: 30.78931562, valid loss: 1379.55780890
Iter-149, train loss: 28.41182540, valid loss: 1600.01038019
Iter-150, train loss: 31.90074900, valid loss: 1816.48301178
Iter-151, train loss: 30.91597076, valid loss: 1555.74115566
Iter-152, train loss: 31.06200382, valid loss: 1885.04964239
Iter-153, train loss: 32.10674656, valid loss: 2014.37466809
Iter-154, train loss: 28.34721443, valid loss: 1996.05198379
Iter-155, train loss: 30.14143974, valid loss: 1869.81322423
Iter-156, train loss: 30.24616215, valid loss: 1716.03601395
Iter-157, train loss: 30.48999908, valid loss: 1551.33374541
Iter-158, train loss: 30.92147115, valid loss: 1304.50255504
Iter-159, train loss: 30.78931562, valid loss: 1517.14440062
Iter-160, train loss: 27.33732147, valid loss: 1490.98398621
Iter-161, train loss: 32.53183788, valid loss: 2029.89352099
Iter-162, train loss: 31.40231994, valid loss: 1945.17362097
Iter-163, train loss: 30.24788186, valid loss: 1285.20560489
Iter-164, train loss: 29.87937421, valid loss: 1362.06523466
Iter-165, train loss: 30.03820466, valid loss: 1203.68619980
Iter-166, train loss: 29.40300250, valid loss: 1043.42038145
Iter-167, train loss: 31.22712310, valid loss: 1370.96697450
Iter-168, train loss: 33.14170003, valid loss: 1426.98027513
Iter-169, train loss: 30.79121572, valid loss: 1235.02438943
Iter-170, train loss: 31.08115263, valid loss: 1409.57716752
Iter-171, train loss: 30.15783859, valid loss: 1403.37604758
Iter-172, train loss: 31.51491040, valid loss: 1520.29625109
Iter-173, train loss: 28.65168128, valid loss: 1239.40167737
Iter-174, train loss: 29.55825418, valid loss: 1364.55267523
Iter-175, train loss: 29.46551716, valid loss: 1343.09297172
Iter-176, train loss: 28.87576933, valid loss: 1572.52204764
Iter-177, train loss: 30.78931562, valid loss: 1522.77367635
Iter-178, train loss: 31.05198844, valid loss: 1309.08925857
Iter-179, train loss: 29.74390648, valid loss: 1507.39236988
Iter-180, train loss: 30.21801480, valid loss: 1079.63046817
Iter-181, train loss: 31.32330843, valid loss: 2079.16688097
Iter-182, train loss: 31.82694323, valid loss: 1625.58237166
Iter-183, train loss: 30.39978589, valid loss: 1624.30349257
Iter-184, train loss: 30.35642719, valid loss: 1420.97184055
Iter-185, train loss: 30.38984029, valid loss: 1376.84272452
Iter-186, train loss: 30.87960733, valid loss: 975.00435896
Iter-187, train loss: 30.67145332, valid loss: 729.79816801
Iter-188, train loss: 30.03584943, valid loss: 787.79477469
Iter-189, train loss: 32.32072129, valid loss: 1044.89288845
Iter-190, train loss: 31.39438795, valid loss: 1448.38462905
Iter-191, train loss: 30.92343509, valid loss: 1191.35807155
Iter-192, train loss: 32.69405974, valid loss: 1137.30643969
Iter-193, train loss: 30.70935155, valid loss: 1153.22141368
Iter-194, train loss: 32.59095941, valid loss: 1559.05295558
Iter-195, train loss: 31.49414994, valid loss: 1258.02629986
Iter-196, train loss: 30.72922124, valid loss: 1343.07901518
Iter-197, train loss: 30.43287550, valid loss: 1171.06483069
Iter-198, train loss: 31.31655825, valid loss: 1152.83316259
Iter-199, train loss: 30.39211872, valid loss: 1161.88926350
Iter-200, train loss: 31.46838523, valid loss: 943.61324459
Iter-201, train loss: 35.33751354, valid loss: 1112.38575668
Iter-202, train loss: 31.17790261, valid loss: 1141.40843492
Iter-203, train loss: 30.90803209, valid loss: 1214.42033838
Iter-204, train loss: 30.70278491, valid loss: 1268.86117652
Iter-205, train loss: 30.79360884, valid loss: 1149.96983228
Iter-206, train loss: 30.96860462, valid loss: 1193.22053980
Iter-207, train loss: 29.68328854, valid loss: 1783.79563760
Iter-208, train loss: 32.73102691, valid loss: 1481.26710183
Iter-209, train loss: 30.57769087, valid loss: 1251.30758119
Iter-210, train loss: 30.05111290, valid loss: 1138.83174869
Iter-211, train loss: 29.96077949, valid loss: 872.05934916
Iter-212, train loss: 30.78931562, valid loss: 1310.91638308
Iter-213, train loss: 31.21323033, valid loss: 737.96743842
Iter-214, train loss: 31.39763816, valid loss: 743.95862493
Iter-215, train loss: 32.23430100, valid loss: 1112.66524675
Iter-216, train loss: 31.66187708, valid loss: 757.72200528
Iter-217, train loss: 30.07532827, valid loss: 1031.53945344
Iter-218, train loss: 30.63329138, valid loss: 784.30156538
Iter-219, train loss: 30.39310228, valid loss: 659.29345912
Iter-220, train loss: 29.82578229, valid loss: 795.28721106
Iter-221, train loss: 32.40283820, valid loss: 910.13948266
Iter-222, train loss: 30.62975531, valid loss: 848.44996142
Iter-223, train loss: 35.59270907, valid loss: 791.61937404
Iter-224, train loss: 30.91831944, valid loss: 696.79295897
Iter-225, train loss: 31.46033689, valid loss: 721.79315085
Iter-226, train loss: 30.32679629, valid loss: 737.44567613
Iter-227, train loss: 33.09108316, valid loss: 831.46939113
Iter-228, train loss: 31.09117030, valid loss: 826.32039741
Iter-229, train loss: 33.62883123, valid loss: 961.91820606
Iter-230, train loss: 31.00923325, valid loss: 929.95152369
Iter-231, train loss: 30.70774300, valid loss: 948.48200813
Iter-232, train loss: 31.08775459, valid loss: 944.94210154
Iter-233, train loss: 29.96256476, valid loss: 906.01554156
Iter-234, train loss: 28.98847991, valid loss: 936.58841688
Iter-235, train loss: 30.88793452, valid loss: 927.75739138
Iter-236, train loss: 31.27605421, valid loss: 840.34478223
Iter-237, train loss: 30.93279772, valid loss: 812.25953122
Iter-238, train loss: 31.00959881, valid loss: 879.43579400
Iter-239, train loss: 31.91490943, valid loss: 783.46932692
Iter-240, train loss: 30.26113914, valid loss: 728.70167478
Iter-241, train loss: 31.63059135, valid loss: 847.56849657
Iter-242, train loss: 31.21094849, valid loss: 817.74618650
Iter-243, train loss: 30.91713296, valid loss: 772.42384231
Iter-244, train loss: 30.39425854, valid loss: 702.22347957
Iter-245, train loss: 28.03811657, valid loss: 741.85546507
Iter-246, train loss: 29.74197215, valid loss: 806.05342707
Iter-247, train loss: 30.61243630, valid loss: 8224.15375971
Iter-248, train loss: 30.36296809, valid loss: 21932.97659425
Iter-249, train loss: 29.07117406, valid loss: 21526.43732297
Iter-250, train loss: 31.12882698, valid loss: 20296.90780729
Iter-251, train loss: 31.32701188, valid loss: 15848.24311952
Iter-252, train loss: 30.84832098, valid loss: 669.38039015
Iter-253, train loss: 30.57680747, valid loss: 702.40001992
Iter-254, train loss: 29.50811961, valid loss: 709.73940431
Iter-255, train loss: 30.13491977, valid loss: 743.01254582
Iter-256, train loss: 30.70806314, valid loss: 767.18111421
Iter-257, train loss: 29.88380979, valid loss: 721.81317396
Iter-258, train loss: 29.91661460, valid loss: 633.02510043
Iter-259, train loss: 29.15223820, valid loss: 810.81079974
Iter-260, train loss: 30.41456112, valid loss: 1457.45596067
Iter-261, train loss: 30.34181937, valid loss: 1026.91876934
Iter-262, train loss: 30.79042996, valid loss: 1624.52775989
Iter-263, train loss: 30.78931562, valid loss: 825.94456886
Iter-264, train loss: 30.76792767, valid loss: 1363.24260557
Iter-265, train loss: 30.61058676, valid loss: 1623.13104232
Iter-266, train loss: 30.06731702, valid loss: 2112.87934351
Iter-267, train loss: 27.84041147, valid loss: 1774.21714624
Iter-268, train loss: 29.51299036, valid loss: 2533.26538612
Iter-269, train loss: 28.91657019, valid loss: 2292.91682475
Iter-270, train loss: 30.57657391, valid loss: 2756.32830236
Iter-271, train loss: 30.15986596, valid loss: 2849.93111000
Iter-272, train loss: 27.85495479, valid loss: 2232.32826183
Iter-273, train loss: 30.74859214, valid loss: 2349.46715384
Iter-274, train loss: 31.30623975, valid loss: 2142.16249791
Iter-275, train loss: 31.12042571, valid loss: 2716.62065283
Iter-276, train loss: 30.83899666, valid loss: 2361.71585440
Iter-277, train loss: 28.15707363, valid loss: 2447.45557153
Iter-278, train loss: 31.00462220, valid loss: 2600.35362174
Iter-279, train loss: 31.15782947, valid loss: 1636.21191590
Iter-280, train loss: 29.85159809, valid loss: 1514.92768556
Iter-281, train loss: 28.38582082, valid loss: 1614.58233105
Iter-282, train loss: 30.08108928, valid loss: 1768.79747196

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:100, 1], label='y_pred')
plt.plot(Y_valid[:100, 1], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()

In [ ]:


In [ ]: