In [2]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
# data_path = 'data/bike_data/hour.csv'
data_path = 'data/financial_data/USD_INR.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[2]:
Date Price Open High Low Change %
0 Aug 10, 2017 64.165 63.898 64.175 63.855 0.48
1 Aug 09, 2017 63.860 63.780 63.860 63.710 0.26
2 Aug 08, 2017 63.692 63.750 63.785 63.615 -0.23
3 Aug 07, 2017 63.840 63.710 63.865 63.648 0.26
4 Aug 04, 2017 63.675 63.670 63.786 63.572 -0.05
5 Aug 03, 2017 63.705 63.630 63.720 63.560 0.11
6 Aug 02, 2017 63.635 64.120 64.135 63.590 -0.72
7 Aug 01, 2017 64.095 64.100 64.145 64.040 -0.16
8 Jul 31, 2017 64.200 64.110 64.272 64.045 0.13
9 Jul 28, 2017 64.115 64.190 64.245 64.105 -0.02

In [18]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 100].plot()
plt.legend()
plt.show()



In [19]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, 1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [20]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[20]:
(8.49984091452734e-18, 1.0, 1.0000000000000002, (9697, 5), dtype('float64'))

In [21]:
train_data = data_norm[:9000] # the last dim/variable/feature, 9,697, 697
test_data = data_norm[9000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:8999]
Y_train = train_data[1:9000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [22]:
X_valid = test_data[0:696] 
Y_valid = test_data[1:697]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [23]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H, p_dropout, lam):
        self.D = D
        self.H = H
        self.p_dropout = p_dropout
        self.lam = lam
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        m = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, caches, do_caches = [], [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, cache = self.forward(X, h, self.model)
            y, do_cache = l.dropout_forward(y, self.p_dropout)
            caches.append(cache)
            do_caches.append(do_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        
        return ys, caches, do_caches
                                
    def loss_function(self, y_pred, y_train): # , alpha alpha: learning rate
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression_reg(model=self.model, y_pred=y, y_train=Y, lam=self.lam)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches, do_caches):
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dy = l.dropout_backward(dy, do_caches[t])
            _, dh, grad = self.backward(dy, dh, caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return grads
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [24]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    for i in range(0, X.shape[0], minibatch_size):
    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
        X_mini = X[i:i + minibatch_size]
        y_mini = y[i:i + minibatch_size]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    for iter in range(1, n_iter + 1):
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches, do_caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            grads = nn.train_backward(dys, caches, do_caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [ ]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 1000 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn
keep_prob = 0.95 # p_dropout == keep_prob: keeping neurons/units - default 0.95 to 0.9 based on SELU-Dropout
lam = 1e-4 # regularization

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units, p_dropout=keep_prob, lam=lam) #, L=num_layers, p_dropout=p_dropout

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 26.18145155, valid loss: 730.29228576
Iter-2, train loss: 17.14181781, valid loss: 665.36436458
Iter-3, train loss: 12.01773797, valid loss: 402.53381503
Iter-4, train loss: 10.73105078, valid loss: 248.00385273
Iter-5, train loss: 8.95765424, valid loss: 176.02984756
Iter-6, train loss: 9.60291397, valid loss: 141.11780686
Iter-7, train loss: 9.03502782, valid loss: 124.15782580
Iter-8, train loss: 7.59452932, valid loss: 115.37814368
Iter-9, train loss: 7.23824779, valid loss: 110.42845477
Iter-10, train loss: 6.95693447, valid loss: 107.77207331
Iter-11, train loss: 7.00035512, valid loss: 106.14270658
Iter-12, train loss: 8.28614427, valid loss: 104.81092939
Iter-13, train loss: 6.63530640, valid loss: 104.22523405
Iter-14, train loss: 7.25955584, valid loss: 103.75113185
Iter-15, train loss: 6.93314382, valid loss: 103.67111954
Iter-16, train loss: 7.06967027, valid loss: 103.41388358
Iter-17, train loss: 7.62643659, valid loss: 103.28402310
Iter-18, train loss: 6.61525169, valid loss: 102.95005209
Iter-19, train loss: 6.69881387, valid loss: 102.82715859
Iter-20, train loss: 6.26175895, valid loss: 102.91848212
Iter-21, train loss: 5.65881427, valid loss: 103.10458821
Iter-22, train loss: 6.00172124, valid loss: 103.53290808
Iter-23, train loss: 5.74412826, valid loss: 103.00292457
Iter-24, train loss: 6.32800354, valid loss: 103.52871370
Iter-25, train loss: 6.77415267, valid loss: 103.37247468
Iter-26, train loss: 5.49992863, valid loss: 103.41900029
Iter-27, train loss: 5.27667116, valid loss: 103.10413952
Iter-28, train loss: 5.37012833, valid loss: 103.51368461
Iter-29, train loss: 6.27961793, valid loss: 103.39271431
Iter-30, train loss: 5.58831485, valid loss: 103.08743449
Iter-31, train loss: 5.49299774, valid loss: 102.84221600
Iter-32, train loss: 5.48033029, valid loss: 105.35722640
Iter-33, train loss: 5.47002184, valid loss: 103.42952198
Iter-34, train loss: 6.06266060, valid loss: 105.95968078
Iter-35, train loss: 4.94973319, valid loss: 102.37613652
Iter-36, train loss: 4.91278407, valid loss: 107.48427799
Iter-37, train loss: 5.68526078, valid loss: 106.02157529
Iter-38, train loss: 4.84828185, valid loss: 107.42492627
Iter-39, train loss: 6.10049457, valid loss: 108.98019314
Iter-40, train loss: 4.51854879, valid loss: 241.51148831
Iter-41, train loss: 6.21002945, valid loss: 484.83868682
Iter-42, train loss: 5.18360237, valid loss: 113.78396637
Iter-43, train loss: 5.18818031, valid loss: 497.89099640
Iter-44, train loss: 7.13769067, valid loss: 614.16257806
Iter-45, train loss: 4.82981631, valid loss: 612.69040779
Iter-46, train loss: 4.20185126, valid loss: 631.22726872
Iter-47, train loss: 4.25494484, valid loss: 653.67789246
Iter-48, train loss: 4.81000568, valid loss: 637.08824595
Iter-49, train loss: 5.42932028, valid loss: 646.83266968
Iter-50, train loss: 4.80062242, valid loss: 674.67922198
Iter-51, train loss: 4.98679884, valid loss: 671.52606318
Iter-52, train loss: 4.75024872, valid loss: 671.52296583
Iter-53, train loss: 4.75326232, valid loss: 669.61901944
Iter-54, train loss: 4.95431108, valid loss: 675.10479943
Iter-55, train loss: 4.45531897, valid loss: 678.91783936
Iter-56, train loss: 4.62232943, valid loss: 680.14053155
Iter-57, train loss: 5.51166488, valid loss: 686.17600470
Iter-58, train loss: 3.57383344, valid loss: 692.09076743
Iter-59, train loss: 4.01436615, valid loss: 697.36531481
Iter-60, train loss: 4.65188111, valid loss: 687.51217008
Iter-61, train loss: 5.93839210, valid loss: 690.84111756
Iter-62, train loss: 6.48353583, valid loss: 691.51166580
Iter-63, train loss: 5.01583341, valid loss: 698.95192184
Iter-64, train loss: 3.98454540, valid loss: 686.07279293
Iter-65, train loss: 4.60417492, valid loss: 692.22777215
Iter-66, train loss: 5.04731022, valid loss: 693.47395348
Iter-67, train loss: 5.24954478, valid loss: 692.65128008
Iter-68, train loss: 5.62616112, valid loss: 688.17094935
Iter-69, train loss: 4.77730098, valid loss: 694.38599295
Iter-70, train loss: 4.91564245, valid loss: 700.40673712
Iter-71, train loss: 4.10707882, valid loss: 701.07223828
Iter-72, train loss: 5.88885632, valid loss: 701.32677225
Iter-73, train loss: 5.16247974, valid loss: 694.36959311
Iter-74, train loss: 5.16232594, valid loss: 700.40995517
Iter-75, train loss: 5.05094715, valid loss: 694.80261626
Iter-76, train loss: 5.60517053, valid loss: 693.37464304
Iter-77, train loss: 4.06765451, valid loss: 701.20381546
Iter-78, train loss: 4.96031805, valid loss: 691.64768036
Iter-79, train loss: 4.07883234, valid loss: 693.42923660
Iter-80, train loss: 5.11512538, valid loss: 690.76490906
Iter-81, train loss: 5.12261577, valid loss: 692.81612618
Iter-82, train loss: 3.87138619, valid loss: 694.85000901
Iter-83, train loss: 4.22270155, valid loss: 696.17075240
Iter-84, train loss: 4.19944055, valid loss: 695.49802993
Iter-85, train loss: 3.74108879, valid loss: 690.44910574
Iter-86, train loss: 4.87585547, valid loss: 690.05980832
Iter-87, train loss: 4.59213848, valid loss: 687.80361526
Iter-88, train loss: 4.43139549, valid loss: 691.78662948
Iter-89, train loss: 4.41969855, valid loss: 692.55387988
Iter-90, train loss: 4.85647305, valid loss: 697.89102563
Iter-91, train loss: 4.42308453, valid loss: 688.02177448
Iter-92, train loss: 3.98307702, valid loss: 693.17644517
Iter-93, train loss: 3.99023908, valid loss: 688.86366896
Iter-94, train loss: 4.41152628, valid loss: 681.12934357
Iter-95, train loss: 4.83165523, valid loss: 679.31231690
Iter-96, train loss: 4.16222321, valid loss: 685.98152584
Iter-97, train loss: 4.17551905, valid loss: 685.02534241
Iter-98, train loss: 5.03402653, valid loss: 685.83856845
Iter-99, train loss: 4.61043686, valid loss: 675.89587194
Iter-100, train loss: 4.41047805, valid loss: 678.01034654
Iter-101, train loss: 4.38139406, valid loss: 672.11308652
Iter-102, train loss: 5.24322652, valid loss: 661.67831612
Iter-103, train loss: 4.59110938, valid loss: 655.31719800
Iter-104, train loss: 3.71903951, valid loss: 660.04606885
Iter-105, train loss: 4.36964557, valid loss: 665.26594242
Iter-106, train loss: 3.77680628, valid loss: 665.82029645
Iter-107, train loss: 4.18020057, valid loss: 667.20445855
Iter-108, train loss: 5.58168187, valid loss: 667.31533247
Iter-109, train loss: 5.46087538, valid loss: 666.23784278
Iter-110, train loss: 5.22372545, valid loss: 664.74384819
Iter-111, train loss: 4.35325042, valid loss: 668.97530819
Iter-112, train loss: 3.49806929, valid loss: 668.54491198
Iter-113, train loss: 3.69752759, valid loss: 670.11846662
Iter-114, train loss: 3.85812612, valid loss: 654.23802089
Iter-115, train loss: 3.54208568, valid loss: 651.45167763
Iter-116, train loss: 4.82318862, valid loss: 647.80480488
Iter-117, train loss: 3.23858195, valid loss: 644.64157809
Iter-118, train loss: 3.47928072, valid loss: 640.26646663
Iter-119, train loss: 4.32425619, valid loss: 651.85994326
Iter-120, train loss: 4.33849772, valid loss: 647.62064676
Iter-121, train loss: 4.76669450, valid loss: 642.94145518
Iter-122, train loss: 5.87666008, valid loss: 642.94896165
Iter-123, train loss: 4.10836804, valid loss: 650.43194376
Iter-124, train loss: 3.94437311, valid loss: 643.38324961
Iter-125, train loss: 3.95055288, valid loss: 642.86177405
Iter-126, train loss: 5.38618628, valid loss: 659.84810156
Iter-127, train loss: 4.76700320, valid loss: 659.84914266
Iter-128, train loss: 3.67618019, valid loss: 643.12171296
Iter-129, train loss: 4.31072554, valid loss: 640.82151564
Iter-130, train loss: 4.53014063, valid loss: 637.50890984
Iter-131, train loss: 4.32904433, valid loss: 621.80494112
Iter-132, train loss: 4.76018231, valid loss: 618.68419075
Iter-133, train loss: 3.70651610, valid loss: 619.79247410
Iter-134, train loss: 3.88719666, valid loss: 614.91632077
Iter-135, train loss: 4.76502152, valid loss: 610.31378813
Iter-136, train loss: 4.75054139, valid loss: 609.52865686
Iter-137, train loss: 3.41506366, valid loss: 615.50400940
Iter-138, train loss: 4.75615505, valid loss: 609.93672541
Iter-139, train loss: 4.29726655, valid loss: 599.05312241
Iter-140, train loss: 4.07859068, valid loss: 598.28115992
Iter-141, train loss: 4.74355193, valid loss: 606.21361497
Iter-142, train loss: 4.96650105, valid loss: 598.91296111
Iter-143, train loss: 4.32025747, valid loss: 603.62285080
Iter-144, train loss: 4.08745706, valid loss: 591.69173412
Iter-145, train loss: 4.34209415, valid loss: 574.11813609
Iter-146, train loss: 5.38658398, valid loss: 558.84372651
Iter-147, train loss: 5.60930785, valid loss: 535.42132670
Iter-148, train loss: 4.31584181, valid loss: 515.20367288
Iter-149, train loss: 5.15726338, valid loss: 500.73396882
Iter-150, train loss: 4.96080721, valid loss: 456.07993093
Iter-151, train loss: 4.52730966, valid loss: 513.87570916
Iter-152, train loss: 4.79195400, valid loss: 505.63729028
Iter-153, train loss: 4.52153396, valid loss: 521.67034065
Iter-154, train loss: 4.28241474, valid loss: 504.97502985
Iter-155, train loss: 4.48758373, valid loss: 504.79815536
Iter-156, train loss: 5.65310997, valid loss: 499.31470984
Iter-157, train loss: 4.28123088, valid loss: 486.40910011
Iter-158, train loss: 4.76203702, valid loss: 522.12785727
Iter-159, train loss: 4.97506356, valid loss: 514.98503190
Iter-160, train loss: 5.21526014, valid loss: 526.26103375
Iter-161, train loss: 4.33041023, valid loss: 507.12731420
Iter-162, train loss: 4.29469485, valid loss: 471.98946304
Iter-163, train loss: 4.10497279, valid loss: 458.08055036
Iter-164, train loss: 4.74596718, valid loss: 527.87237293
Iter-165, train loss: 4.52646828, valid loss: 513.51188228
Iter-166, train loss: 5.19492747, valid loss: 501.12192857
Iter-167, train loss: 4.07010226, valid loss: 476.38837932
Iter-168, train loss: 4.32628399, valid loss: 547.24382914
Iter-169, train loss: 3.72825982, valid loss: 512.40650055
Iter-170, train loss: 5.86378223, valid loss: 529.49689112
Iter-171, train loss: 4.94413135, valid loss: 536.49586380
Iter-172, train loss: 3.45883947, valid loss: 512.44471220
Iter-173, train loss: 3.73564841, valid loss: 432.08489084
Iter-174, train loss: 3.41867873, valid loss: 396.25733085
Iter-175, train loss: 4.31630923, valid loss: 414.39493902
Iter-176, train loss: 4.97336337, valid loss: 473.07466898
Iter-177, train loss: 6.31398821, valid loss: 519.87027862
Iter-178, train loss: 4.83168841, valid loss: 540.49171862
Iter-179, train loss: 4.97763385, valid loss: 485.39602887
Iter-180, train loss: 5.25279304, valid loss: 511.73802899
Iter-181, train loss: 4.74389435, valid loss: 416.79298516
Iter-182, train loss: 4.07940766, valid loss: 549.78145483
Iter-183, train loss: 4.15480256, valid loss: 544.82514018
Iter-184, train loss: 5.66941566, valid loss: 506.77575263
Iter-185, train loss: 4.06587224, valid loss: 505.01181369
Iter-186, train loss: 3.89473282, valid loss: 325.64990734
Iter-187, train loss: 5.18423178, valid loss: 506.40194995
Iter-188, train loss: 4.72241919, valid loss: 490.53347524
Iter-189, train loss: 5.18543606, valid loss: 477.72648218
Iter-190, train loss: 4.09762450, valid loss: 531.26529463
Iter-191, train loss: 4.32574705, valid loss: 602.19443259
Iter-192, train loss: 4.75395823, valid loss: 552.55363375
Iter-193, train loss: 5.41301219, valid loss: 395.14194002
Iter-194, train loss: 3.90002166, valid loss: 111.87433426
Iter-195, train loss: 4.76541445, valid loss: 104.22071100
Iter-196, train loss: 4.77188133, valid loss: 93.25225395
Iter-197, train loss: 4.97218682, valid loss: 90.31301085
Iter-198, train loss: 4.64363248, valid loss: 87.02734003
Iter-199, train loss: 5.17610543, valid loss: 85.19556236
Iter-200, train loss: 4.53614327, valid loss: 84.30861470
Iter-201, train loss: 4.33777138, valid loss: 88.39677904
Iter-202, train loss: 5.01672988, valid loss: 98.99489981
Iter-203, train loss: 4.55907052, valid loss: 90.63791054
Iter-204, train loss: 4.34251747, valid loss: 89.82423746
Iter-205, train loss: 4.56937487, valid loss: 84.23401166
Iter-206, train loss: 3.46012442, valid loss: 87.81663159
Iter-207, train loss: 3.69578659, valid loss: 89.47601804
Iter-208, train loss: 4.34512494, valid loss: 87.06911251
Iter-209, train loss: 4.35931843, valid loss: 87.86028213
Iter-210, train loss: 4.12388157, valid loss: 84.99498659
Iter-211, train loss: 4.80664903, valid loss: 85.25843332
Iter-212, train loss: 4.80271998, valid loss: 88.62357019
Iter-213, train loss: 4.33927713, valid loss: 90.59486041
Iter-214, train loss: 3.50676050, valid loss: 93.21441928
Iter-215, train loss: 4.60437199, valid loss: 90.06844510
Iter-216, train loss: 3.69723972, valid loss: 92.23484587
Iter-217, train loss: 3.91725233, valid loss: 103.47577491
Iter-218, train loss: 5.02041775, valid loss: 88.15747435
Iter-219, train loss: 4.30821316, valid loss: 87.09129087
Iter-220, train loss: 4.13135569, valid loss: 86.94541068
Iter-221, train loss: 3.70387300, valid loss: 87.44996627
Iter-222, train loss: 3.90000911, valid loss: 86.49449561
Iter-223, train loss: 4.16182979, valid loss: 85.95889919
Iter-224, train loss: 5.02911620, valid loss: 83.89944484
Iter-225, train loss: 3.90603200, valid loss: 82.99447898
Iter-226, train loss: 3.72384577, valid loss: 85.18712209
Iter-227, train loss: 4.82276759, valid loss: 88.51323062
Iter-228, train loss: 5.01101235, valid loss: 86.15746159
Iter-229, train loss: 4.36915463, valid loss: 87.22901017
Iter-230, train loss: 5.20213965, valid loss: 87.32178703
Iter-231, train loss: 4.56631025, valid loss: 85.82406983
Iter-232, train loss: 4.16432035, valid loss: 88.22181723
Iter-233, train loss: 4.40534002, valid loss: 87.91365996
Iter-234, train loss: 5.03744093, valid loss: 88.94204500
Iter-235, train loss: 4.78980591, valid loss: 89.63287211
Iter-236, train loss: 4.16193390, valid loss: 89.86618133
Iter-237, train loss: 4.82038396, valid loss: 92.15485100
Iter-238, train loss: 3.72983072, valid loss: 95.40024412
Iter-239, train loss: 3.89560359, valid loss: 90.13965602
Iter-240, train loss: 4.19265160, valid loss: 94.60950569
Iter-241, train loss: 3.73241904, valid loss: 88.31031932
Iter-242, train loss: 4.82379957, valid loss: 94.34215325
Iter-243, train loss: 5.01198322, valid loss: 89.94239138
Iter-244, train loss: 4.82926989, valid loss: 91.32505851
Iter-245, train loss: 4.44647800, valid loss: 92.52983528
Iter-246, train loss: 5.26404691, valid loss: 89.80109489
Iter-247, train loss: 4.02888313, valid loss: 91.48383360
Iter-248, train loss: 4.16926609, valid loss: 94.01073051
Iter-249, train loss: 4.61942933, valid loss: 97.23621713
Iter-250, train loss: 4.15759368, valid loss: 94.83207453
Iter-251, train loss: 4.59811074, valid loss: 103.06326672
Iter-252, train loss: 3.49456667, valid loss: 105.15581798
Iter-253, train loss: 3.53092382, valid loss: 106.16507458
Iter-254, train loss: 4.11836823, valid loss: 105.09135127
Iter-255, train loss: 4.16222774, valid loss: 94.27531705
Iter-256, train loss: 4.17050642, valid loss: 95.71400409
Iter-257, train loss: 5.03062265, valid loss: 102.39289365
Iter-258, train loss: 4.00503900, valid loss: 104.90662232
Iter-259, train loss: 4.59572116, valid loss: 99.83697807
Iter-260, train loss: 4.60896294, valid loss: 95.93595474
Iter-261, train loss: 5.25361080, valid loss: 92.40310511
Iter-262, train loss: 4.59638572, valid loss: 99.21803954
Iter-263, train loss: 4.62380955, valid loss: 105.38436843
Iter-264, train loss: 5.25609454, valid loss: 98.01995920
Iter-265, train loss: 4.60969367, valid loss: 102.04959176
Iter-266, train loss: 3.75394573, valid loss: 100.77075921
Iter-267, train loss: 4.18465131, valid loss: 102.34474913
Iter-268, train loss: 4.25579897, valid loss: 101.99155411
Iter-269, train loss: 5.06243315, valid loss: 101.77028546
Iter-270, train loss: 3.53236743, valid loss: 106.19897769
Iter-271, train loss: 4.20378222, valid loss: 112.35384028
Iter-272, train loss: 4.19784079, valid loss: 110.83514025
Iter-273, train loss: 4.15348448, valid loss: 105.65874332
Iter-274, train loss: 5.06792650, valid loss: 94.44707046
Iter-275, train loss: 3.51465268, valid loss: 87.87581873
Iter-276, train loss: 3.32119670, valid loss: 87.88362639
Iter-277, train loss: 5.13062168, valid loss: 100.12477884
Iter-278, train loss: 4.18820807, valid loss: 97.44871902
Iter-279, train loss: 5.71601517, valid loss: 106.72850662
Iter-280, train loss: 3.97029537, valid loss: 96.26720200
Iter-281, train loss: 4.39261964, valid loss: 95.48554615
Iter-282, train loss: 5.30453950, valid loss: 96.46198770
Iter-283, train loss: 3.52851840, valid loss: 92.22055686
Iter-284, train loss: 4.99677665, valid loss: 98.62224676
Iter-285, train loss: 4.62674170, valid loss: 106.48713648
Iter-286, train loss: 4.40790032, valid loss: 102.70119670
Iter-287, train loss: 4.86139290, valid loss: 106.91092652
Iter-288, train loss: 4.91956033, valid loss: 93.97269956
Iter-289, train loss: 3.75849349, valid loss: 102.58433122
Iter-290, train loss: 5.29469949, valid loss: 105.51049863
Iter-291, train loss: 4.46675436, valid loss: 103.60533687
Iter-292, train loss: 4.62443093, valid loss: 88.65751065
Iter-293, train loss: 5.01596900, valid loss: 99.42841319
Iter-294, train loss: 4.39602960, valid loss: 97.31236941
Iter-295, train loss: 3.96837692, valid loss: 100.17060609
Iter-296, train loss: 5.11517536, valid loss: 101.61878835
Iter-297, train loss: 4.24441675, valid loss: 101.70679777
Iter-298, train loss: 3.95710573, valid loss: 88.95685108
Iter-299, train loss: 4.39483465, valid loss: 90.60671062
Iter-300, train loss: 4.62971629, valid loss: 96.10263855
Iter-301, train loss: 4.42542415, valid loss: 92.89522387
Iter-302, train loss: 4.84279883, valid loss: 104.24026364
Iter-303, train loss: 3.96896968, valid loss: 92.70155356
Iter-304, train loss: 4.41550048, valid loss: 96.82652240
Iter-305, train loss: 5.33613847, valid loss: 94.64796041
Iter-306, train loss: 5.56356427, valid loss: 92.22400962
Iter-307, train loss: 5.07598160, valid loss: 98.20481282
Iter-308, train loss: 3.97539470, valid loss: 87.95364882
Iter-309, train loss: 4.39262034, valid loss: 83.85440566
Iter-310, train loss: 4.84795820, valid loss: 81.26494510
Iter-311, train loss: 6.12791561, valid loss: 81.44824709
Iter-312, train loss: 4.20919102, valid loss: 84.33555168
Iter-313, train loss: 5.26769461, valid loss: 126.29127688
Iter-314, train loss: 3.99735406, valid loss: 95.45983123
Iter-315, train loss: 5.04886820, valid loss: 109.26370460
Iter-316, train loss: 4.41242006, valid loss: 81.29221245
Iter-317, train loss: 6.14729539, valid loss: 84.90762424
Iter-318, train loss: 4.87007440, valid loss: 86.89107497
Iter-319, train loss: 4.83404323, valid loss: 81.59644298
Iter-320, train loss: 5.06769415, valid loss: 125.23609898
Iter-321, train loss: 4.85573116, valid loss: 119.45694722
Iter-322, train loss: 5.48895166, valid loss: 118.22350276
Iter-323, train loss: 3.75980604, valid loss: 98.24937755
Iter-324, train loss: 4.87950748, valid loss: 152.02813593
Iter-325, train loss: 4.06659635, valid loss: 176.47440873
Iter-326, train loss: 4.20034058, valid loss: 198.34683351
Iter-327, train loss: 4.22900417, valid loss: 204.84630898
Iter-328, train loss: 4.91525553, valid loss: 176.95445906
Iter-329, train loss: 5.06898448, valid loss: 166.40636380
Iter-330, train loss: 4.40850065, valid loss: 154.18460245
Iter-331, train loss: 4.84713506, valid loss: 167.07921049
Iter-332, train loss: 5.08743923, valid loss: 185.96426505
Iter-333, train loss: 5.28377128, valid loss: 200.77509581
Iter-334, train loss: 5.51263342, valid loss: 193.17100237
Iter-335, train loss: 4.26890834, valid loss: 180.88797262
Iter-336, train loss: 5.57190073, valid loss: 166.24836218
Iter-337, train loss: 4.19327777, valid loss: 168.22812177
Iter-338, train loss: 4.63886183, valid loss: 167.57624958
Iter-339, train loss: 4.43269196, valid loss: 167.64494871
Iter-340, train loss: 3.42815291, valid loss: 185.17069184
Iter-341, train loss: 4.85764856, valid loss: 184.67215991
Iter-342, train loss: 4.18834877, valid loss: 203.35544326
Iter-343, train loss: 4.45049466, valid loss: 167.37341194
Iter-344, train loss: 4.43886344, valid loss: 159.30066689
Iter-345, train loss: 4.21595239, valid loss: 153.91163039
Iter-346, train loss: 5.33973111, valid loss: 165.26371619
Iter-347, train loss: 4.44029955, valid loss: 165.86180415
Iter-348, train loss: 5.10791329, valid loss: 160.96248771
Iter-349, train loss: 5.84446774, valid loss: 171.03636501
Iter-350, train loss: 4.25652956, valid loss: 150.66500185
Iter-351, train loss: 4.03046687, valid loss: 170.81948520
Iter-352, train loss: 4.45539828, valid loss: 170.83996304
Iter-353, train loss: 4.22483584, valid loss: 156.70916624
Iter-354, train loss: 4.23997499, valid loss: 151.87668360
Iter-355, train loss: 4.72198084, valid loss: 178.46509671
Iter-356, train loss: 3.57380473, valid loss: 174.17561436
Iter-357, train loss: 4.27449767, valid loss: 166.64015226
Iter-358, train loss: 4.45249438, valid loss: 178.57851935
Iter-359, train loss: 4.87869911, valid loss: 170.60794948
Iter-360, train loss: 3.39166544, valid loss: 212.69283935
Iter-361, train loss: 4.02745764, valid loss: 202.91643935
Iter-362, train loss: 4.24783230, valid loss: 196.07433570
Iter-363, train loss: 5.11317090, valid loss: 198.00086650
Iter-364, train loss: 5.11535830, valid loss: 194.23825048
Iter-365, train loss: 4.22870312, valid loss: 179.08368699
Iter-366, train loss: 4.26861858, valid loss: 177.50512872
Iter-367, train loss: 3.59244398, valid loss: 161.19185981
Iter-368, train loss: 3.80099208, valid loss: 186.21626428
Iter-369, train loss: 4.67654391, valid loss: 179.31700813
Iter-370, train loss: 5.39221550, valid loss: 189.40868318
Iter-371, train loss: 5.74260701, valid loss: 180.23519805
Iter-372, train loss: 5.15723940, valid loss: 168.92215910
Iter-373, train loss: 4.22770468, valid loss: 141.45242617
Iter-374, train loss: 5.16439117, valid loss: 170.95485078
Iter-375, train loss: 5.14023385, valid loss: 169.02608665
Iter-376, train loss: 3.65812335, valid loss: 177.53884270
Iter-377, train loss: 4.26228048, valid loss: 174.15587584
Iter-378, train loss: 4.24959903, valid loss: 173.72125118
Iter-379, train loss: 3.82042218, valid loss: 179.58197090
Iter-380, train loss: 3.84694102, valid loss: 174.25492443
Iter-381, train loss: 4.67963584, valid loss: 165.10992296
Iter-382, train loss: 3.41132075, valid loss: 174.43098981
Iter-383, train loss: 5.76233024, valid loss: 157.73134530
Iter-384, train loss: 4.04891920, valid loss: 92.40291325
Iter-385, train loss: 4.89958949, valid loss: 150.82308013
Iter-386, train loss: 4.73159167, valid loss: 114.96314292
Iter-387, train loss: 4.67532136, valid loss: 149.96548432
Iter-388, train loss: 6.63894311, valid loss: 153.83614135
Iter-389, train loss: 4.69582931, valid loss: 106.46307891
Iter-390, train loss: 4.03589388, valid loss: 163.41199903
Iter-391, train loss: 5.34368521, valid loss: 105.57449856
Iter-392, train loss: 3.61986233, valid loss: 140.06586425
Iter-393, train loss: 3.80249640, valid loss: 88.07858275
Iter-394, train loss: 4.11052489, valid loss: 106.30536179
Iter-395, train loss: 3.60122093, valid loss: 99.13198444
Iter-396, train loss: 4.29370670, valid loss: 108.69285838
Iter-397, train loss: 4.67895286, valid loss: 155.76791007
Iter-398, train loss: 4.91099124, valid loss: 96.34472264
Iter-399, train loss: 4.03189732, valid loss: 91.14485336
Iter-400, train loss: 4.92132451, valid loss: 89.61992261
Iter-401, train loss: 5.10806061, valid loss: 97.85046457
Iter-402, train loss: 5.13852607, valid loss: 99.60716662
Iter-403, train loss: 4.49852576, valid loss: 99.44383610
Iter-404, train loss: 4.29728798, valid loss: 89.73893052
Iter-405, train loss: 4.22129760, valid loss: 114.67817378
Iter-406, train loss: 4.01135399, valid loss: 94.55507838
Iter-407, train loss: 3.80336423, valid loss: 98.79943199
Iter-408, train loss: 4.46707830, valid loss: 90.05397460
Iter-409, train loss: 5.31154368, valid loss: 99.03359052
Iter-410, train loss: 5.57485630, valid loss: 92.77613099
Iter-411, train loss: 4.48273851, valid loss: 91.55178309
Iter-412, train loss: 4.91158580, valid loss: 143.84477165
Iter-413, train loss: 5.75155663, valid loss: 90.42457207
Iter-414, train loss: 3.87186192, valid loss: 90.66359369
Iter-415, train loss: 4.55460850, valid loss: 95.11534175
Iter-416, train loss: 4.27878805, valid loss: 166.75935536
Iter-417, train loss: 4.49355485, valid loss: 96.61257688
Iter-418, train loss: 4.46384997, valid loss: 92.33630500
Iter-419, train loss: 5.13156131, valid loss: 102.66064505
Iter-420, train loss: 3.81619862, valid loss: 147.90533187
Iter-421, train loss: 4.23253521, valid loss: 91.98278613
Iter-422, train loss: 4.04101655, valid loss: 87.31469792
Iter-423, train loss: 4.69215410, valid loss: 100.49505829
Iter-424, train loss: 4.91806511, valid loss: 92.68298880
Iter-425, train loss: 4.02226779, valid loss: 155.02652538
Iter-426, train loss: 4.92505432, valid loss: 90.89968039
Iter-427, train loss: 4.02157208, valid loss: 89.15707311
Iter-428, train loss: 3.83532530, valid loss: 103.58916094
Iter-429, train loss: 5.33244313, valid loss: 169.89676817
Iter-430, train loss: 4.64666042, valid loss: 93.27382562
Iter-431, train loss: 4.90720538, valid loss: 160.92862544
Iter-432, train loss: 4.67233652, valid loss: 92.79396199
Iter-433, train loss: 5.33429553, valid loss: 186.14027970
Iter-434, train loss: 4.23643870, valid loss: 89.60084220
Iter-435, train loss: 3.80824846, valid loss: 137.96065674
Iter-436, train loss: 4.67152176, valid loss: 90.10408635
Iter-437, train loss: 5.34066774, valid loss: 153.49876686
Iter-438, train loss: 4.12689871, valid loss: 90.14624448
Iter-439, train loss: 5.32533131, valid loss: 87.80912859
Iter-440, train loss: 4.24555276, valid loss: 90.24467330
Iter-441, train loss: 3.80735462, valid loss: 88.23053410
Iter-442, train loss: 4.93201287, valid loss: 92.92471372
Iter-443, train loss: 6.61947523, valid loss: 86.26693301
Iter-444, train loss: 4.03132769, valid loss: 92.93518123
Iter-445, train loss: 4.03334270, valid loss: 86.14565187
Iter-446, train loss: 4.87663322, valid loss: 90.96940713
Iter-447, train loss: 4.69886394, valid loss: 87.46886948
Iter-448, train loss: 4.00437229, valid loss: 86.91470398
Iter-449, train loss: 4.46280405, valid loss: 90.89725107
Iter-450, train loss: 4.19587261, valid loss: 192.35698741
Iter-451, train loss: 5.59257638, valid loss: 89.69390678
Iter-452, train loss: 4.23535788, valid loss: 87.58593595
Iter-453, train loss: 5.08417542, valid loss: 88.23216330
Iter-454, train loss: 5.75020761, valid loss: 85.89807445
Iter-455, train loss: 5.08445856, valid loss: 86.51423500
Iter-456, train loss: 4.62734088, valid loss: 86.20664673
Iter-457, train loss: 4.42504945, valid loss: 90.19349080
Iter-458, train loss: 4.44839904, valid loss: 87.42823366
Iter-459, train loss: 5.09824032, valid loss: 90.37548738
Iter-460, train loss: 5.50587274, valid loss: 182.89565606
Iter-461, train loss: 4.22152088, valid loss: 88.13940252
Iter-462, train loss: 4.44885082, valid loss: 170.28616777
Iter-463, train loss: 3.78851607, valid loss: 130.85015182
Iter-464, train loss: 5.06890886, valid loss: 88.13819721
Iter-465, train loss: 4.86655758, valid loss: 91.04298480
Iter-466, train loss: 3.76618108, valid loss: 158.61077960
Iter-467, train loss: 3.77729246, valid loss: 87.58299307
Iter-468, train loss: 4.81713616, valid loss: 171.43641139
Iter-469, train loss: 4.64323767, valid loss: 89.00821585
Iter-470, train loss: 4.97406963, valid loss: 87.76693856
Iter-471, train loss: 4.21436944, valid loss: 89.37608104
Iter-472, train loss: 4.88367456, valid loss: 88.38667032
Iter-473, train loss: 4.20704261, valid loss: 88.15422604
Iter-474, train loss: 4.86359438, valid loss: 86.96275981
Iter-475, train loss: 4.88507212, valid loss: 163.99824847
Iter-476, train loss: 4.41277923, valid loss: 87.56276291
Iter-477, train loss: 3.75798542, valid loss: 88.02708059
Iter-478, train loss: 4.86629291, valid loss: 86.63363827
Iter-479, train loss: 4.82969684, valid loss: 200.04679142
Iter-480, train loss: 5.31657244, valid loss: 87.56189667
Iter-481, train loss: 4.83682174, valid loss: 86.77457709
Iter-482, train loss: 4.51663781, valid loss: 88.85906097
Iter-483, train loss: 4.47961334, valid loss: 154.56266182
Iter-484, train loss: 3.88966003, valid loss: 87.39607226
Iter-485, train loss: 5.04321774, valid loss: 163.59781116
Iter-486, train loss: 4.61971647, valid loss: 86.25754967
Iter-487, train loss: 4.22165193, valid loss: 88.36136659
Iter-488, train loss: 3.97387604, valid loss: 89.53270492
Iter-489, train loss: 5.50277418, valid loss: 88.46608464
Iter-490, train loss: 5.40916467, valid loss: 87.31655129
Iter-491, train loss: 4.40996483, valid loss: 186.01588996
Iter-492, train loss: 3.95570854, valid loss: 165.74381974
Iter-493, train loss: 4.87576694, valid loss: 191.85774926
Iter-494, train loss: 5.47643743, valid loss: 89.02757903
Iter-495, train loss: 4.63817449, valid loss: 91.78002406
Iter-496, train loss: 5.12216171, valid loss: 88.54609812
Iter-497, train loss: 4.62241272, valid loss: 175.46898213
Iter-498, train loss: 4.82124538, valid loss: 87.41144403
Iter-499, train loss: 5.70082248, valid loss: 178.44886998
Iter-500, train loss: 4.09307621, valid loss: 86.32911495
Iter-501, train loss: 4.19314030, valid loss: 177.70385584
Iter-502, train loss: 4.02109312, valid loss: 88.38474019
Iter-503, train loss: 4.37619326, valid loss: 162.26098188
Iter-504, train loss: 3.56275411, valid loss: 85.76816796
Iter-505, train loss: 4.23404300, valid loss: 177.19160096
Iter-506, train loss: 4.21238677, valid loss: 86.41075036
Iter-507, train loss: 5.08311793, valid loss: 87.26658736
Iter-508, train loss: 4.43779167, valid loss: 87.69482109
Iter-509, train loss: 4.14447816, valid loss: 87.39716345
Iter-510, train loss: 4.41513393, valid loss: 188.30133006
Iter-511, train loss: 5.04654179, valid loss: 89.78662323
Iter-512, train loss: 4.61291684, valid loss: 185.56786752
Iter-513, train loss: 4.59332240, valid loss: 185.61924959
Iter-514, train loss: 4.85068404, valid loss: 95.55566370
Iter-515, train loss: 4.18410688, valid loss: 87.17608737
Iter-516, train loss: 4.85788142, valid loss: 88.31567935
Iter-517, train loss: 4.39225841, valid loss: 88.67270110
Iter-518, train loss: 4.19392397, valid loss: 188.43834942
Iter-519, train loss: 5.03702600, valid loss: 88.69531147
Iter-520, train loss: 3.94972561, valid loss: 175.24261026
Iter-521, train loss: 4.62191159, valid loss: 191.03847026
Iter-522, train loss: 4.85692710, valid loss: 87.45363693
Iter-523, train loss: 4.61461217, valid loss: 231.82850935
Iter-524, train loss: 4.63625198, valid loss: 90.39668577
Iter-525, train loss: 4.84597718, valid loss: 279.95372093
Iter-526, train loss: 4.80729638, valid loss: 166.84780501
Iter-527, train loss: 4.62933896, valid loss: 796.63606328
Iter-528, train loss: 5.06825070, valid loss: 177.63538431
Iter-529, train loss: 3.74368005, valid loss: 87.49391953
Iter-530, train loss: 4.44315840, valid loss: 177.76032508
Iter-531, train loss: 5.73062081, valid loss: 89.20906314
Iter-532, train loss: 5.48238507, valid loss: 175.05243273
Iter-533, train loss: 5.03447042, valid loss: 86.54980430
Iter-534, train loss: 5.31212372, valid loss: 171.82263923
Iter-535, train loss: 5.46607848, valid loss: 86.48744625
Iter-536, train loss: 5.89934428, valid loss: 163.98755803
Iter-537, train loss: 5.28986041, valid loss: 680.36603543
Iter-538, train loss: 4.89381806, valid loss: 95.20766186
Iter-539, train loss: 5.63887577, valid loss: 89.08700540
Iter-540, train loss: 5.24875865, valid loss: 171.61677610
Iter-541, train loss: 4.16775074, valid loss: 86.99831429
Iter-542, train loss: 4.84502399, valid loss: 185.65201769
Iter-543, train loss: 4.15920090, valid loss: 186.05087933
Iter-544, train loss: 5.05577025, valid loss: 195.45520490
Iter-545, train loss: 5.06091921, valid loss: 654.77022224
Iter-546, train loss: 4.43084381, valid loss: 202.78493957
Iter-547, train loss: 5.31197506, valid loss: 87.49652892
Iter-548, train loss: 5.75611300, valid loss: 179.23041411
Iter-549, train loss: 4.02431446, valid loss: 92.20792584
Iter-550, train loss: 3.56248490, valid loss: 155.96156115
Iter-551, train loss: 3.73162285, valid loss: 168.22275850
Iter-552, train loss: 4.45105207, valid loss: 88.09642289
Iter-553, train loss: 5.26920727, valid loss: 86.84629880
Iter-554, train loss: 4.61120015, valid loss: 88.14422500
Iter-555, train loss: 3.98386815, valid loss: 188.99840407
Iter-556, train loss: 4.16054386, valid loss: 203.45602314
Iter-557, train loss: 5.24770540, valid loss: 207.78588248
Iter-558, train loss: 3.58911111, valid loss: 241.37318652
Iter-559, train loss: 3.73575317, valid loss: 191.71274280
Iter-560, train loss: 4.16554593, valid loss: 91.61908132
Iter-561, train loss: 3.59824724, valid loss: 90.37266706
Iter-562, train loss: 4.61100993, valid loss: 86.55820280

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()

In [ ]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:300], label='y_pred')
plt.plot(Y_valid[:300], label='Y_valid')
# plt.plot(X_valid, label='X_valid')
plt.legend()
plt.show()

In [ ]: