In [1]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[1]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [2]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [3]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -1:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [4]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[4]:
(-1.0548364452851478e-16, 1.0, 1.0, (17379, 1), dtype('float64'))

In [5]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [6]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [7]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H):
        self.D = D
        self.H = H
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
#         low, high = (-1. / np.sqrt(Z / 2.)), (1. / np.sqrt(Z / 2.))
#         low, high = (-1.0), (+1.0)
        low, high = (-0.5), (+0.5)
        m = dict(
#             Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
#             Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            Wz=np.random.uniform(size=(Z, H), low=low, high=high),
            Wr=np.random.uniform(size=(Z, H), low=low, high=high),
            Wh=np.random.uniform(size=(Z, H), low=low, high=high),
            Wy=np.random.uniform(size=(H, D), low=low, high=high),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches = [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, fc_cache = self.forward(X, h, self.model)
            fc_caches.append(fc_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = fc_caches
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dX, dh, grad = self.backward(dy, dh, fc_caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return dX, grads # TODO: dX is not used but this is a REMINDER that it exists!
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [8]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:(i + minibatch_size)]
        y_mini = y[i:(i + minibatch_size)]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [9]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 300 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units) 

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 25.75183218, valid loss: 3251.55888850
Iter-2, train loss: 19.89496242, valid loss: 2650.49710219
Iter-3, train loss: 17.30009100, valid loss: 2369.41167174
Iter-4, train loss: 15.74423721, valid loss: 2219.63527981
Iter-5, train loss: 14.49307876, valid loss: 1948.96943143
Iter-6, train loss: 13.43299689, valid loss: 2193.23326204
Iter-7, train loss: 12.54601288, valid loss: 2238.11466653
Iter-8, train loss: 11.80592516, valid loss: 2259.10775960
Iter-9, train loss: 11.18226387, valid loss: 2260.71253937
Iter-10, train loss: 10.64784783, valid loss: 2168.09310873
Iter-11, train loss: 10.18103999, valid loss: 1811.53883851
Iter-12, train loss: 9.76629906, valid loss: 1460.59102672
Iter-13, train loss: 9.39299821, valid loss: 1445.65931772
Iter-14, train loss: 9.05388355, valid loss: 1561.41871815
Iter-15, train loss: 8.74394194, valid loss: 1428.06672041
Iter-16, train loss: 8.45966161, valid loss: 1560.20480729
Iter-17, train loss: 8.19851673, valid loss: 1518.80209219
Iter-18, train loss: 7.95859249, valid loss: 1515.18365435
Iter-19, train loss: 7.73832197, valid loss: 1507.16550968
Iter-20, train loss: 7.53633691, valid loss: 1537.58550425
Iter-21, train loss: 7.35143665, valid loss: 1584.27219090
Iter-22, train loss: 7.18259253, valid loss: 1768.73724451
Iter-23, train loss: 7.02890220, valid loss: 1762.80149789
Iter-24, train loss: 6.88950691, valid loss: 1654.98594051
Iter-25, train loss: 6.76351978, valid loss: 1715.79114142
Iter-26, train loss: 6.64998778, valid loss: 1636.99936271
Iter-27, train loss: 6.54788849, valid loss: 1716.33000630
Iter-28, train loss: 6.45615327, valid loss: 1686.02807200
Iter-29, train loss: 6.37370391, valid loss: 1676.05768419
Iter-30, train loss: 6.29949019, valid loss: 1661.01784693
Iter-31, train loss: 6.23252024, valid loss: 1688.90716882
Iter-32, train loss: 6.17188111, valid loss: 1769.28939149
Iter-33, train loss: 6.11675027, valid loss: 1783.11062482
Iter-34, train loss: 6.06640042, valid loss: 1748.76015120
Iter-35, train loss: 6.02019965, valid loss: 1782.59525266
Iter-36, train loss: 5.97760889, valid loss: 1791.30897686
Iter-37, train loss: 5.93817745, valid loss: 1800.29452248
Iter-38, train loss: 5.90153753, valid loss: 1871.67518810
Iter-39, train loss: 5.86739804, valid loss: 1827.67430447
Iter-40, train loss: 5.83553747, valid loss: 1770.11681355
Iter-41, train loss: 5.80579569, valid loss: 1837.51401859
Iter-42, train loss: 5.77806387, valid loss: 1935.19762348
Iter-43, train loss: 5.75227224, valid loss: 1910.46330411
Iter-44, train loss: 5.72837559, valid loss: 1855.55129619
Iter-45, train loss: 5.70633808, valid loss: 1800.37794007
Iter-46, train loss: 5.68611937, valid loss: 1872.24481215
Iter-47, train loss: 5.66766469, valid loss: 1996.11952603
Iter-48, train loss: 5.65090010, valid loss: 1851.03800838
Iter-49, train loss: 5.63573288, valid loss: 1934.30627599
Iter-50, train loss: 5.62205534, valid loss: 1783.80787757
Iter-51, train loss: 5.60975011, valid loss: 1708.10195739
Iter-52, train loss: 5.59869524, valid loss: 1510.44896200
Iter-53, train loss: 5.58876818, valid loss: 1598.63237088
Iter-54, train loss: 5.57984848, valid loss: 1565.80703268
Iter-55, train loss: 5.57181936, valid loss: 1529.73408816
Iter-56, train loss: 5.56456831, valid loss: 1480.63468452
Iter-57, train loss: 5.55798736, valid loss: 1479.25651934
Iter-58, train loss: 5.55197293, valid loss: 1440.71433806
Iter-59, train loss: 5.54642573, valid loss: 1437.80637488
Iter-60, train loss: 5.54125070, valid loss: 1422.04778253
Iter-61, train loss: 5.53635706, valid loss: 1452.52074768
Iter-62, train loss: 5.53165854, valid loss: 1396.17934108
Iter-63, train loss: 5.52707369, valid loss: 1392.36812961
Iter-64, train loss: 5.52252630, valid loss: 1377.13717676
Iter-65, train loss: 5.51794587, valid loss: 1356.39568703
Iter-66, train loss: 5.51326795, valid loss: 1330.84582663
Iter-67, train loss: 5.50843453, valid loss: 1315.36727541
Iter-68, train loss: 5.50339418, valid loss: 1304.65470329
Iter-69, train loss: 5.49810220, valid loss: 1314.54157620
Iter-70, train loss: 5.49252049, valid loss: 1295.01731510
Iter-71, train loss: 5.48661741, valid loss: 1293.01253821
Iter-72, train loss: 5.48036740, valid loss: 1370.72527810
Iter-73, train loss: 5.47375064, valid loss: 1399.27824089
Iter-74, train loss: 5.46675256, valid loss: 1388.15737491
Iter-75, train loss: 5.45936329, valid loss: 1380.66736040
Iter-76, train loss: 5.45157718, valid loss: 1400.05614911
Iter-77, train loss: 5.44339225, valid loss: 1456.74267801
Iter-78, train loss: 5.43480966, valid loss: 1408.56258213
Iter-79, train loss: 5.42583328, valid loss: 1499.09156919
Iter-80, train loss: 5.41646919, valid loss: 1295.89420227
Iter-81, train loss: 5.40672536, valid loss: 1675.50746352
Iter-82, train loss: 5.39661123, valid loss: 1866.77945415
Iter-83, train loss: 5.38613744, valid loss: 1903.73603714
Iter-84, train loss: 5.37531554, valid loss: 1799.01274491
Iter-85, train loss: 5.36415774, valid loss: 6627.08324742
Iter-86, train loss: 5.35267671, valid loss: 6888.80291487
Iter-87, train loss: 5.34088536, valid loss: 2225.98949501
Iter-88, train loss: 5.32879666, valid loss: 1071.12934965
Iter-89, train loss: 5.31642352, valid loss: 1367.14965274
Iter-90, train loss: 5.30377861, valid loss: 1087.58397617
Iter-91, train loss: 5.29087427, valid loss: 1554.78165636
Iter-92, train loss: 5.27772242, valid loss: 6975.23690341
Iter-93, train loss: 5.26433448, valid loss: 1016.27121894
Iter-94, train loss: 5.25072138, valid loss: 7256.27755678
Iter-95, train loss: 5.23689348, valid loss: 7316.30740555
Iter-96, train loss: 5.22286065, valid loss: 7352.24672735
Iter-97, train loss: 5.20863222, valid loss: 7384.63887976
Iter-98, train loss: 5.19421708, valid loss: 7415.35679798
Iter-99, train loss: 5.17962365, valid loss: 7445.01505938
Iter-100, train loss: 5.16485997, valid loss: 7473.89293179
Iter-101, train loss: 5.14993367, valid loss: 7502.14079840
Iter-102, train loss: 5.13485200, valid loss: 7529.84778191
Iter-103, train loss: 5.11962182, valid loss: 7557.06861991
Iter-104, train loss: 5.10424959, valid loss: 7583.83543101
Iter-105, train loss: 5.08874128, valid loss: 7610.16273153
Iter-106, train loss: 5.07310238, valid loss: 7636.04858749
Iter-107, train loss: 5.05733782, valid loss: 7661.47235515
Iter-108, train loss: 5.04145188, valid loss: 7686.38726237
Iter-109, train loss: 5.02544819, valid loss: 7710.70132136
Iter-110, train loss: 5.00932961, valid loss: 7734.22327054
Iter-111, train loss: 4.99309825, valid loss: 7756.46483088
Iter-112, train loss: 4.97675543, valid loss: 7775.36802314
Iter-113, train loss: 4.96030167, valid loss: 1437.80451852
Iter-114, train loss: 4.94373675, valid loss: 7552.88898467
Iter-115, train loss: 4.92705972, valid loss: 7586.59195485
Iter-116, train loss: 4.91026902, valid loss: 7609.30196614
Iter-117, train loss: 4.89336256, valid loss: 1394.61566955
Iter-118, train loss: 4.87633786, valid loss: 1413.44778417
Iter-119, train loss: 4.85919219, valid loss: 1427.27621859
Iter-120, train loss: 4.84192271, valid loss: 1415.88308621
Iter-121, train loss: 4.82452665, valid loss: 1430.99219644
Iter-122, train loss: 4.80700144, valid loss: 1410.41629171
Iter-123, train loss: 4.78934488, valid loss: 1407.28243317
Iter-124, train loss: 4.77155523, valid loss: 1419.83426994
Iter-125, train loss: 4.75363135, valid loss: 1402.29455684
Iter-126, train loss: 4.73557273, valid loss: 1363.23452544
Iter-127, train loss: 4.71737957, valid loss: 1538.05638252
Iter-128, train loss: 4.69905281, valid loss: 1414.25334153
Iter-129, train loss: 4.68059405, valid loss: 1540.04558094
Iter-130, train loss: 4.66200562, valid loss: 1448.14560994
Iter-131, train loss: 4.64329042, valid loss: 1442.12813169
Iter-132, train loss: 4.62445194, valid loss: 1490.56132154
Iter-133, train loss: 4.60549411, valid loss: 1487.81435909
Iter-134, train loss: 4.58642126, valid loss: 1389.71866428
Iter-135, train loss: 4.56723801, valid loss: 1485.76314104
Iter-136, train loss: 4.54794920, valid loss: 1475.66495886
Iter-137, train loss: 4.52855982, valid loss: 1505.01866073
Iter-138, train loss: 4.50907495, valid loss: 1525.21009317
Iter-139, train loss: 4.48949972, valid loss: 1502.65527231
Iter-140, train loss: 4.46983929, valid loss: 1507.03682824
Iter-141, train loss: 4.45009886, valid loss: 1514.96956228
Iter-142, train loss: 4.43028367, valid loss: 1488.38781808
Iter-143, train loss: 4.41039904, valid loss: 1521.10719891
Iter-144, train loss: 4.39045045, valid loss: 1603.08464372
Iter-145, train loss: 4.37044355, valid loss: 1585.12372963
Iter-146, train loss: 4.35038426, valid loss: 1566.41308002
Iter-147, train loss: 4.33027887, valid loss: 1587.62817198
Iter-148, train loss: 4.31013401, valid loss: 1556.83930130
Iter-149, train loss: 4.28995683, valid loss: 1430.95112349
Iter-150, train loss: 4.26975492, valid loss: 1585.51031571
Iter-151, train loss: 4.24953645, valid loss: 1545.10349539
Iter-152, train loss: 4.22931008, valid loss: 1564.70786118
Iter-153, train loss: 4.20908505, valid loss: 1509.48594159
Iter-154, train loss: 4.18887107, valid loss: 1528.78778513
Iter-155, train loss: 4.16867835, valid loss: 1577.65551334
Iter-156, train loss: 4.14851753, valid loss: 1440.03613572
Iter-157, train loss: 4.12839957, valid loss: 1549.07295526
Iter-158, train loss: 4.10833578, valid loss: 1523.40453755
Iter-159, train loss: 4.08833764, valid loss: 1500.13483211
Iter-160, train loss: 4.06841683, valid loss: 1538.16096603
Iter-161, train loss: 4.04858506, valid loss: 1488.95960489
Iter-162, train loss: 4.02885406, valid loss: 1424.91803315
Iter-163, train loss: 4.00923548, valid loss: 1589.25066231
Iter-164, train loss: 3.98974081, valid loss: 1555.85829990
Iter-165, train loss: 3.97038136, valid loss: 6860.82337518
Iter-166, train loss: 3.95116814, valid loss: 1479.33261332
Iter-167, train loss: 3.93211183, valid loss: 1504.06717480
Iter-168, train loss: 3.91322274, valid loss: 1584.22500992
Iter-169, train loss: 3.89451071, valid loss: 1571.29130556
Iter-170, train loss: 3.87598512, valid loss: 1485.26301531
Iter-171, train loss: 3.85765479, valid loss: 1586.34897125
Iter-172, train loss: 3.83952800, valid loss: 1528.18856584
Iter-173, train loss: 3.82161234, valid loss: 1447.21247630
Iter-174, train loss: 3.80391499, valid loss: 6021.20268909
Iter-175, train loss: 3.78644192, valid loss: 1429.46210470
Iter-176, train loss: 3.76919961, valid loss: 1543.33756173
Iter-177, train loss: 3.75219136, valid loss: 1530.80514501
Iter-178, train loss: 3.73542494, valid loss: 1523.81628092
Iter-179, train loss: 3.71889660, valid loss: 1384.71544181
Iter-180, train loss: 3.70262484, valid loss: 6002.13405320
Iter-181, train loss: 3.68657840, valid loss: 1408.34170107
Iter-182, train loss: 3.67083125, valid loss: 5849.41800790
Iter-183, train loss: 3.65522650, valid loss: 1478.10476330
Iter-184, train loss: 3.64011137, valid loss: 1681.02634320
Iter-185, train loss: 3.62471698, valid loss: 7261.18797718
Iter-186, train loss: 3.61075983, valid loss: 1497.57847843
Iter-187, train loss: 3.59437187, valid loss: 1518.38737086
Iter-188, train loss: 3.58433018, valid loss: 1498.44691319
Iter-189, train loss: 3.56058663, valid loss: 1520.73566753
Iter-190, train loss: 3.56911441, valid loss: 1660.35647877
Iter-191, train loss: 3.50760773, valid loss: 8352.64875579
Iter-192, train loss: 3.58832462, valid loss: 1591.86394491
Iter-193, train loss: 3.47610453, valid loss: 8467.66252851
Iter-194, train loss: 3.56845086, valid loss: 1514.16390950
Iter-195, train loss: 3.47553601, valid loss: 6363.52309268
Iter-196, train loss: 3.50118459, valid loss: 1568.48047619
Iter-197, train loss: 3.41622413, valid loss: 1594.34691908
Iter-198, train loss: 3.50882342, valid loss: 1694.06265537
Iter-199, train loss: 3.41052844, valid loss: 8463.39676880
Iter-200, train loss: 3.47222533, valid loss: 1702.07528215
Iter-201, train loss: 3.37933130, valid loss: 8467.05282750
Iter-202, train loss: 3.45811575, valid loss: 1734.00891946
Iter-203, train loss: 3.36383311, valid loss: 8467.40980630
Iter-204, train loss: 3.43501014, valid loss: 1604.34693548
Iter-205, train loss: 3.34446706, valid loss: 8468.10619548
Iter-206, train loss: 3.41693819, valid loss: 1605.85833255
Iter-207, train loss: 3.32759063, valid loss: 8472.15083690
Iter-208, train loss: 3.39863164, valid loss: 1714.90684258
Iter-209, train loss: 3.31111499, valid loss: 8480.53179525
Iter-210, train loss: 3.38102474, valid loss: 1693.78943174
Iter-211, train loss: 3.29520293, valid loss: 8490.88894264
Iter-212, train loss: 3.36394489, valid loss: 1732.97263188
Iter-213, train loss: 3.27979972, valid loss: 8494.99087569
Iter-214, train loss: 3.34730604, valid loss: 1756.16653383
Iter-215, train loss: 3.26491005, valid loss: 8506.04601605
Iter-216, train loss: 3.33109762, valid loss: 1775.46527340
Iter-217, train loss: 3.25056317, valid loss: 1993.38586071
Iter-218, train loss: 3.31532350, valid loss: 1713.12944775
Iter-219, train loss: 3.23679919, valid loss: 1592.67099455
Iter-220, train loss: 3.29999639, valid loss: 1722.98811897
Iter-221, train loss: 3.22366103, valid loss: 1681.47196815
Iter-222, train loss: 3.28513419, valid loss: 1725.14957950
Iter-223, train loss: 3.21118978, valid loss: 1617.09321067
Iter-224, train loss: 3.27075661, valid loss: 1620.88730268
Iter-225, train loss: 3.19942256, valid loss: 1631.85875037
Iter-226, train loss: 3.25688323, valid loss: 1758.12795197
Iter-227, train loss: 3.18839136, valid loss: 1767.16582708
Iter-228, train loss: 3.24353235, valid loss: 1695.28614142
Iter-229, train loss: 3.17812242, valid loss: 1652.41439237
Iter-230, train loss: 3.23071945, valid loss: 8339.94248920
Iter-231, train loss: 3.16863511, valid loss: 1764.78540823
Iter-232, train loss: 3.21845428, valid loss: 1642.24547737
Iter-233, train loss: 3.15993873, valid loss: 1760.67221099
Iter-234, train loss: 3.20673486, valid loss: 8798.32739715
Iter-235, train loss: 3.15202224, valid loss: 1731.29644219
Iter-236, train loss: 3.19553659, valid loss: 3943.53265423
Iter-237, train loss: 3.14482667, valid loss: 1793.03755502
Iter-238, train loss: 3.18479416, valid loss: 3965.85241683
Iter-239, train loss: 3.13818402, valid loss: 1825.94052233
Iter-240, train loss: 3.17437809, valid loss: 4136.32292189
Iter-241, train loss: 3.13172131, valid loss: 1960.32634094
Iter-242, train loss: 3.16408543, valid loss: 1674.88445914
Iter-243, train loss: 3.12481528, valid loss: 1829.71257970
Iter-244, train loss: 3.15370323, valid loss: 1669.30071132
Iter-245, train loss: 3.11680850, valid loss: 1901.58443122
Iter-246, train loss: 3.14316326, valid loss: 1772.35203988
Iter-247, train loss: 3.10749647, valid loss: 1861.40080783
Iter-248, train loss: 3.13251877, valid loss: 1859.31925590
Iter-249, train loss: 3.09733883, valid loss: 1954.83835444
Iter-250, train loss: 3.12149083, valid loss: 1809.23067049
Iter-251, train loss: 3.08697680, valid loss: 9150.52008424
Iter-252, train loss: 3.10943680, valid loss: 1940.15439010
Iter-253, train loss: 3.07654267, valid loss: 9143.28701788
Iter-254, train loss: 3.09640964, valid loss: 8123.46327861
Iter-255, train loss: 3.06570949, valid loss: 9147.75663209
Iter-256, train loss: 3.08354784, valid loss: 1904.31090658
Iter-257, train loss: 3.05453177, valid loss: 9151.87503264
Iter-258, train loss: 3.07118724, valid loss: 1908.35522097
Iter-259, train loss: 3.04339084, valid loss: 9157.59446522
Iter-260, train loss: 3.05738903, valid loss: 2022.76000114
Iter-261, train loss: 3.03218709, valid loss: 9166.81844611
Iter-262, train loss: 3.04212285, valid loss: 1877.97658706
Iter-263, train loss: 3.02083391, valid loss: 9175.14937289
Iter-264, train loss: 3.02891474, valid loss: 1896.02758156
Iter-265, train loss: 3.00966618, valid loss: 9177.36076728
Iter-266, train loss: 3.01826856, valid loss: 1953.27492659
Iter-267, train loss: 2.99746115, valid loss: 9180.33452655
Iter-268, train loss: 3.00151776, valid loss: 1815.81673316
Iter-269, train loss: 2.98447450, valid loss: 9191.98871114
Iter-270, train loss: 2.98278113, valid loss: 1854.34812094
Iter-271, train loss: 2.97414007, valid loss: 9197.49076413
Iter-272, train loss: 2.97590951, valid loss: 1854.67390128
Iter-273, train loss: 2.97021082, valid loss: 2111.39538121
Iter-274, train loss: 2.97739146, valid loss: 2005.67113540
Iter-275, train loss: 2.94431525, valid loss: 2316.35793846
Iter-276, train loss: 2.94295709, valid loss: 1851.36510947
Iter-277, train loss: 2.95553185, valid loss: 2063.17577818
Iter-278, train loss: 2.93861569, valid loss: 2169.30575119
Iter-279, train loss: 2.93738442, valid loss: 1985.89577855
Iter-280, train loss: 2.92540408, valid loss: 2044.16068742
Iter-281, train loss: 2.96497621, valid loss: 2212.39933713
Iter-282, train loss: 2.99116806, valid loss: 8519.08002502
Iter-283, train loss: 2.90598182, valid loss: 2720.71065760
Iter-284, train loss: 2.88027684, valid loss: 9202.62559468
Iter-285, train loss: 2.91320684, valid loss: 1923.83714398
Iter-286, train loss: 2.92544996, valid loss: 2497.23209434
Iter-287, train loss: 2.91496370, valid loss: 1982.47526056
Iter-288, train loss: 2.84630942, valid loss: 2485.71994367
Iter-289, train loss: 2.84492361, valid loss: 9189.93400922
Iter-290, train loss: 2.89251590, valid loss: 1938.94265134
Iter-291, train loss: 2.87723757, valid loss: 8904.48168508
Iter-292, train loss: 2.85493706, valid loss: 1999.95157377
Iter-293, train loss: 2.81116446, valid loss: 9208.36240815
Iter-294, train loss: 2.81514019, valid loss: 9187.15293589
Iter-295, train loss: 2.86358616, valid loss: 1952.35831106
Iter-296, train loss: 2.83592701, valid loss: 2446.73299612
Iter-297, train loss: 2.81536224, valid loss: 1724.98188058
Iter-298, train loss: 2.78237868, valid loss: 2232.57781344
Iter-299, train loss: 2.78553534, valid loss: 9196.64131857
Iter-300, train loss: 2.83349271, valid loss: 2118.35624435
Out[9]:
<__main__.GRU at 0x7f19e11e58d0>

In [10]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [11]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [19]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[:350], label='y_pred')
plt.plot(Y_valid[:350], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: