In [1]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[1]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [2]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [6]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -3:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [7]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[7]:
(-1.7444323642958519e-17, 1.0, 1.0, (17379, 3), dtype('float64'))

In [8]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [9]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [10]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H):
        self.D = D
        self.H = H
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        low, high = (-1.0), (+1.0)
        low_Z, high_Z = (low / np.sqrt(Z / 2.)), (high / np.sqrt(Z / 2.))
        low_H, high_H = (low / np.sqrt(H / 2.)), (high / np.sqrt(H / 2.))
        m = dict(
            #             Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            Wz=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wr=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wh=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wy=np.random.uniform(size=(H, D), low=low_H, high=high_H),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches = [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, fc_cache = self.forward(X, h, self.model)
            fc_caches.append(fc_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = fc_caches
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dX, dh, grad = self.backward(dy, dh, fc_caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return dX, grads # TODO: dX is not used but this is a REMINDER that it exists!
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [11]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:(i + minibatch_size)]
        y_mini = y[i:(i + minibatch_size)]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [12]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 300 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units) 

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 37.76712215, valid loss: 595.88534521
Iter-2, train loss: 33.39966703, valid loss: 600.79235656
Iter-3, train loss: 30.41622594, valid loss: 608.01712947
Iter-4, train loss: 28.17192823, valid loss: 617.30775615
Iter-5, train loss: 26.44124599, valid loss: 628.72757242
Iter-6, train loss: 25.02354502, valid loss: 641.96398451
Iter-7, train loss: 23.78334818, valid loss: 656.14900323
Iter-8, train loss: 22.66574792, valid loss: 670.53816359
Iter-9, train loss: 21.65106835, valid loss: 684.75017105
Iter-10, train loss: 20.72559700, valid loss: 698.91944892
Iter-11, train loss: 19.87410039, valid loss: 713.69549122
Iter-12, train loss: 19.08086482, valid loss: 730.02378929
Iter-13, train loss: 18.33178522, valid loss: 749.02146855
Iter-14, train loss: 17.61554607, valid loss: 772.16420571
Iter-15, train loss: 16.92439080, valid loss: 801.83031031
Iter-16, train loss: 16.25473116, valid loss: 842.02624001
Iter-17, train loss: 15.60690722, valid loss: 898.48060110
Iter-18, train loss: 14.98311421, valid loss: 975.58680963
Iter-19, train loss: 14.38408307, valid loss: 1067.60744158
Iter-20, train loss: 13.80825741, valid loss: 1152.33146460
Iter-21, train loss: 13.25473837, valid loss: 1204.55113888
Iter-22, train loss: 12.72465655, valid loss: 1216.58812005
Iter-23, train loss: 12.21893210, valid loss: 1197.81006569
Iter-24, train loss: 11.73660193, valid loss: 1161.31512945
Iter-25, train loss: 11.27541813, valid loss: 1116.19223346
Iter-26, train loss: 10.83320116, valid loss: 1066.99048438
Iter-27, train loss: 10.40873560, valid loss: 1015.52960918
Iter-28, train loss: 10.00198412, valid loss: 962.47373207
Iter-29, train loss: 9.61378635, valid loss: 908.27122700
Iter-30, train loss: 9.24530166, valid loss: 853.65357477
Iter-31, train loss: 8.89747732, valid loss: 799.79777854
Iter-32, train loss: 8.57074407, valid loss: 748.20507035
Iter-33, train loss: 8.26496833, valid loss: 700.39544705
Iter-34, train loss: 7.97956978, valid loss: 657.48675055
Iter-35, train loss: 7.71370356, valid loss: 689.67468529
Iter-36, train loss: 7.46642746, valid loss: 747.38035565
Iter-37, train loss: 7.23681157, valid loss: 807.32362703
Iter-38, train loss: 7.02399005, valid loss: 875.06977182
Iter-39, train loss: 6.82717130, valid loss: 938.47812647
Iter-40, train loss: 6.64562421, valid loss: 989.27755910
Iter-41, train loss: 6.47865155, valid loss: 1193.32328994
Iter-42, train loss: 6.32555770, valid loss: 1126.04872590
Iter-43, train loss: 6.18562000, valid loss: 1247.46219496
Iter-44, train loss: 6.05807179, valid loss: 1554.47833872
Iter-45, train loss: 5.94209978, valid loss: 1326.58842989
Iter-46, train loss: 5.83685310, valid loss: 1484.47666388
Iter-47, train loss: 5.74145963, valid loss: 1783.57952386
Iter-48, train loss: 5.65504623, valid loss: 1852.02384687
Iter-49, train loss: 5.57675951, valid loss: 1944.73843289
Iter-50, train loss: 5.50578410, valid loss: 1934.37788978
Iter-51, train loss: 5.44135643, valid loss: 1946.33322915
Iter-52, train loss: 5.38277357, valid loss: 1924.52807286
Iter-53, train loss: 5.32939753, valid loss: 1836.41501989
Iter-54, train loss: 5.28065583, valid loss: 1822.32065301
Iter-55, train loss: 5.23603910, valid loss: 1749.35152289
Iter-56, train loss: 5.19509675, valid loss: 1682.17210256
Iter-57, train loss: 5.15743144, valid loss: 1616.49754857
Iter-58, train loss: 5.12269286, valid loss: 1645.77197261
Iter-59, train loss: 5.09057156, valid loss: 1640.06591526
Iter-60, train loss: 5.06079312, valid loss: 1605.51085766
Iter-61, train loss: 5.03311303, valid loss: 1600.91489483
Iter-62, train loss: 5.00731233, valid loss: 1612.37689826
Iter-63, train loss: 4.98319412, valid loss: 1554.64904462
Iter-64, train loss: 4.96058079, valid loss: 1423.90935672
Iter-65, train loss: 4.93931176, valid loss: 1562.66875450
Iter-66, train loss: 4.91924180, valid loss: 1569.46298826
Iter-67, train loss: 4.90023957, valid loss: 1530.31633337
Iter-68, train loss: 4.88218645, valid loss: 1427.15264820
Iter-69, train loss: 4.86497543, valid loss: 1415.16625513
Iter-70, train loss: 4.84851018, valid loss: 1431.99129832
Iter-71, train loss: 4.83270414, valid loss: 1484.18199143
Iter-72, train loss: 4.81747964, valid loss: 1509.97276300
Iter-73, train loss: 4.80276715, valid loss: 1409.87380536
Iter-74, train loss: 4.78850446, valid loss: 1454.07485713
Iter-75, train loss: 4.77463603, valid loss: 1450.65049667
Iter-76, train loss: 4.76111226, valid loss: 1454.44135243
Iter-77, train loss: 4.74788897, valid loss: 1450.47712000
Iter-78, train loss: 4.73492674, valid loss: 1379.92007623
Iter-79, train loss: 4.72219047, valid loss: 1361.20473123
Iter-80, train loss: 4.70964887, valid loss: 1557.33781533
Iter-81, train loss: 4.69727403, valid loss: 1569.22362531
Iter-82, train loss: 4.68504101, valid loss: 1580.92721616
Iter-83, train loss: 4.67292753, valid loss: 1506.35720680
Iter-84, train loss: 4.66091356, valid loss: 1488.41920449
Iter-85, train loss: 4.64898104, valid loss: 1402.69111453
Iter-86, train loss: 4.63711360, valid loss: 1567.36548488
Iter-87, train loss: 4.62529627, valid loss: 1613.84289116
Iter-88, train loss: 4.61351523, valid loss: 1631.61919269
Iter-89, train loss: 4.60175756, valid loss: 1640.52120904
Iter-90, train loss: 4.59001108, valid loss: 1652.37126100
Iter-91, train loss: 4.57826408, valid loss: 1571.99306680
Iter-92, train loss: 4.56650521, valid loss: 1565.57938796
Iter-93, train loss: 4.55472331, valid loss: 1566.18759225
Iter-94, train loss: 4.54290730, valid loss: 1586.01718206
Iter-95, train loss: 4.53104608, valid loss: 1598.48643929
Iter-96, train loss: 4.51912847, valid loss: 1563.34231446
Iter-97, train loss: 4.50714322, valid loss: 1759.52107463
Iter-98, train loss: 4.49507902, valid loss: 1557.64762648
Iter-99, train loss: 4.48292454, valid loss: 1497.98270306
Iter-100, train loss: 4.47066854, valid loss: 1496.41337733
Iter-101, train loss: 4.45830000, valid loss: 1559.57633497
Iter-102, train loss: 4.44580827, valid loss: 1597.83925130
Iter-103, train loss: 4.43318325, valid loss: 1662.09346638
Iter-104, train loss: 4.42041561, valid loss: 1628.47826882
Iter-105, train loss: 4.40749699, valid loss: 1697.26149648
Iter-106, train loss: 4.39442021, valid loss: 1691.26401514
Iter-107, train loss: 4.38117945, valid loss: 1734.28354240
Iter-108, train loss: 4.36777048, valid loss: 1764.41652063
Iter-109, train loss: 4.35419076, valid loss: 1686.13569384
Iter-110, train loss: 4.34043960, valid loss: 1710.16532424
Iter-111, train loss: 4.32651820, valid loss: 1698.16927020
Iter-112, train loss: 4.31242968, valid loss: 1737.66424017
Iter-113, train loss: 4.29817905, valid loss: 1757.07501230
Iter-114, train loss: 4.28377313, valid loss: 2073.03099844
Iter-115, train loss: 4.26922044, valid loss: 2094.78033794
Iter-116, train loss: 4.25453102, valid loss: 2079.47544072
Iter-117, train loss: 4.23971622, valid loss: 1846.56047806
Iter-118, train loss: 4.22478850, valid loss: 1725.42611199
Iter-119, train loss: 4.20976117, valid loss: 1728.45056500
Iter-120, train loss: 4.19464816, valid loss: 1693.62760154
Iter-121, train loss: 4.17946378, valid loss: 1700.51366874
Iter-122, train loss: 4.16422247, valid loss: 1804.24229260
Iter-123, train loss: 4.14893865, valid loss: 1706.97945288
Iter-124, train loss: 4.13362647, valid loss: 1594.58569792
Iter-125, train loss: 4.11829973, valid loss: 1581.28756670
Iter-126, train loss: 4.10297173, valid loss: 1575.31542823
Iter-127, train loss: 4.08765517, valid loss: 1485.79637572
Iter-128, train loss: 4.07236214, valid loss: 1466.28108432
Iter-129, train loss: 4.05710406, valid loss: 1487.82239528
Iter-130, train loss: 4.04189170, valid loss: 1439.18344200
Iter-131, train loss: 4.02673520, valid loss: 1481.54400814
Iter-132, train loss: 4.01164414, valid loss: 1425.61739739
Iter-133, train loss: 3.99662756, valid loss: 1388.98524155
Iter-134, train loss: 3.98169405, valid loss: 1457.30477925
Iter-135, train loss: 3.96685182, valid loss: 1498.61082378
Iter-136, train loss: 3.95210882, valid loss: 1461.76762970
Iter-137, train loss: 3.93747276, valid loss: 1512.65424306
Iter-138, train loss: 3.92295121, valid loss: 1516.74453417
Iter-139, train loss: 3.90855171, valid loss: 1503.19349364
Iter-140, train loss: 3.89428175, valid loss: 1474.63910137
Iter-141, train loss: 3.88014887, valid loss: 1406.91866162
Iter-142, train loss: 3.86616070, valid loss: 1368.47960712
Iter-143, train loss: 3.85232494, valid loss: 1412.18268435
Iter-144, train loss: 3.83864939, valid loss: 1474.93217899
Iter-145, train loss: 3.82514192, valid loss: 1445.41500296
Iter-146, train loss: 3.81181045, valid loss: 1503.73472403
Iter-147, train loss: 3.79866290, valid loss: 1524.27639883
Iter-148, train loss: 3.78570712, valid loss: 1510.97912079
Iter-149, train loss: 3.77295083, valid loss: 1486.24979330
Iter-150, train loss: 3.76040155, valid loss: 1442.24008714
Iter-151, train loss: 3.74806647, valid loss: 1398.43737222
Iter-152, train loss: 3.73595240, valid loss: 1362.03087416
Iter-153, train loss: 3.72406570, valid loss: 1328.18837284
Iter-154, train loss: 3.71241211, valid loss: 1360.78354135
Iter-155, train loss: 3.70099678, valid loss: 1292.09513229
Iter-156, train loss: 3.68982411, valid loss: 1121.59173625
Iter-157, train loss: 3.67889777, valid loss: 1119.64725018
Iter-158, train loss: 3.66822060, valid loss: 1116.59087893
Iter-159, train loss: 3.65779463, valid loss: 1113.06303033
Iter-160, train loss: 3.64762105, valid loss: 1109.67008176
Iter-161, train loss: 3.63770020, valid loss: 1106.75684483
Iter-162, train loss: 3.62803164, valid loss: 1423.73208599
Iter-163, train loss: 3.61861414, valid loss: 1431.78911181
Iter-164, train loss: 3.60944573, valid loss: 1419.53144930
Iter-165, train loss: 3.60052378, valid loss: 1294.59020924
Iter-166, train loss: 3.59184503, valid loss: 1288.17809542
Iter-167, train loss: 3.58340570, valid loss: 1282.54477094
Iter-168, train loss: 3.57520148, valid loss: 1277.16934161
Iter-169, train loss: 3.56722771, valid loss: 1271.95645356
Iter-170, train loss: 3.55947933, valid loss: 1266.92232833
Iter-171, train loss: 3.55195105, valid loss: 1262.20482260
Iter-172, train loss: 3.54463732, valid loss: 1258.33056879
Iter-173, train loss: 3.53753247, valid loss: 1827.34872933
Iter-174, train loss: 3.53063069, valid loss: 1886.09358392
Iter-175, train loss: 3.52392614, valid loss: 1907.02136246
Iter-176, train loss: 3.51741293, valid loss: 1901.24370326
Iter-177, train loss: 3.51108521, valid loss: 1888.09687560
Iter-178, train loss: 3.50493716, valid loss: 1896.56497312
Iter-179, train loss: 3.49896304, valid loss: 1884.77287589
Iter-180, train loss: 3.49315718, valid loss: 1888.26862934
Iter-181, train loss: 3.48751405, valid loss: 1895.15593153
Iter-182, train loss: 3.48202820, valid loss: 1921.96335114
Iter-183, train loss: 3.47669435, valid loss: 1922.81490514
Iter-184, train loss: 3.47150733, valid loss: 1901.96577899
Iter-185, train loss: 3.46646212, valid loss: 1926.10944976
Iter-186, train loss: 3.46155384, valid loss: 1900.58328929
Iter-187, train loss: 3.45677777, valid loss: 1943.98274564
Iter-188, train loss: 3.45212932, valid loss: 1910.52233892
Iter-189, train loss: 3.44760405, valid loss: 1954.52089706
Iter-190, train loss: 3.44319767, valid loss: 1910.52007449
Iter-191, train loss: 3.43890603, valid loss: 1938.52056160
Iter-192, train loss: 3.43472513, valid loss: 1904.06565088
Iter-193, train loss: 3.43065110, valid loss: 1945.40115346
Iter-194, train loss: 3.42668021, valid loss: 1936.04411872
Iter-195, train loss: 3.42280887, valid loss: 1916.34122658
Iter-196, train loss: 3.41903362, valid loss: 1923.54389858
Iter-197, train loss: 3.41535113, valid loss: 1873.46349305
Iter-198, train loss: 3.41175820, valid loss: 1934.40566560
Iter-199, train loss: 3.40825176, valid loss: 1898.81085557
Iter-200, train loss: 3.40482886, valid loss: 1960.76691190
Iter-201, train loss: 3.40148669, valid loss: 1936.33730608
Iter-202, train loss: 3.39822252, valid loss: 1967.16323995
Iter-203, train loss: 3.39503379, valid loss: 1930.49139473
Iter-204, train loss: 3.39191802, valid loss: 2022.50141759
Iter-205, train loss: 3.38887286, valid loss: 1954.73462201
Iter-206, train loss: 3.38589606, valid loss: 1938.24659008
Iter-207, train loss: 3.38298550, valid loss: 2016.06563180
Iter-208, train loss: 3.38013914, valid loss: 1971.46020607
Iter-209, train loss: 3.37735506, valid loss: 1992.33356569
Iter-210, train loss: 3.37463143, valid loss: 2019.02302536
Iter-211, train loss: 3.37196654, valid loss: 2033.36034441
Iter-212, train loss: 3.36935874, valid loss: 2016.74048472
Iter-213, train loss: 3.36680649, valid loss: 2005.39279232
Iter-214, train loss: 3.36430834, valid loss: 2051.06909276
Iter-215, train loss: 3.36186290, valid loss: 2030.47054100
Iter-216, train loss: 3.35946887, valid loss: 1973.14961836
Iter-217, train loss: 3.35712504, valid loss: 1987.45460956
Iter-218, train loss: 3.35483023, valid loss: 1774.79858369
Iter-219, train loss: 3.35258336, valid loss: 1866.41083896
Iter-220, train loss: 3.35038340, valid loss: 1884.07718207
Iter-221, train loss: 3.34822935, valid loss: 2021.14082183
Iter-222, train loss: 3.34612030, valid loss: 2139.04076446
Iter-223, train loss: 3.34405536, valid loss: 1789.55853235
Iter-224, train loss: 3.34203368, valid loss: 2156.47729181
Iter-225, train loss: 3.34005445, valid loss: 2123.27492064
Iter-226, train loss: 3.33811690, valid loss: 2120.51217500
Iter-227, train loss: 3.33622028, valid loss: 2102.69957289
Iter-228, train loss: 3.33436387, valid loss: 2071.89857886
Iter-229, train loss: 3.33254696, valid loss: 2133.35417314
Iter-230, train loss: 3.33076885, valid loss: 2147.56025494
Iter-231, train loss: 3.32902887, valid loss: 2063.82971785
Iter-232, train loss: 3.32732634, valid loss: 1728.29889327
Iter-233, train loss: 3.32566061, valid loss: 2053.44556489
Iter-234, train loss: 3.32403101, valid loss: 1855.70033440
Iter-235, train loss: 3.32243687, valid loss: 1762.77855514
Iter-236, train loss: 3.32087752, valid loss: 1879.78044459
Iter-237, train loss: 3.31935231, valid loss: 2021.37748370
Iter-238, train loss: 3.31786055, valid loss: 2177.02910173
Iter-239, train loss: 3.31640155, valid loss: 1723.91156087
Iter-240, train loss: 3.31497463, valid loss: 1973.80783738
Iter-241, train loss: 3.31357908, valid loss: 2056.11887496
Iter-242, train loss: 3.31221418, valid loss: 2238.31132214
Iter-243, train loss: 3.31087921, valid loss: 1774.22429759
Iter-244, train loss: 3.30957342, valid loss: 1776.17212663
Iter-245, train loss: 3.30829607, valid loss: 2248.63712728
Iter-246, train loss: 3.30704639, valid loss: 2055.99494911
Iter-247, train loss: 3.30582361, valid loss: 2365.02904920
Iter-248, train loss: 3.30462694, valid loss: 2297.63197616
Iter-249, train loss: 3.30345558, valid loss: 1852.06043863
Iter-250, train loss: 3.30230872, valid loss: 2058.05619479
Iter-251, train loss: 3.30118554, valid loss: 1779.70074449
Iter-252, train loss: 3.30008523, valid loss: 1848.89168347
Iter-253, train loss: 3.29900693, valid loss: 1793.34104132
Iter-254, train loss: 3.29794982, valid loss: 2106.21619290
Iter-255, train loss: 3.29691304, valid loss: 1550.28004196
Iter-256, train loss: 3.29589574, valid loss: 2308.27487447
Iter-257, train loss: 3.29489707, valid loss: 2264.01225321
Iter-258, train loss: 3.29391616, valid loss: 2073.58734866
Iter-259, train loss: 3.29295216, valid loss: 1793.59120820
Iter-260, train loss: 3.29200421, valid loss: 1732.37876029
Iter-261, train loss: 3.29107145, valid loss: 1974.94790779
Iter-262, train loss: 3.29015303, valid loss: 1397.69262287
Iter-263, train loss: 3.28924810, valid loss: 1508.52840978
Iter-264, train loss: 3.28835580, valid loss: 1687.45562418
Iter-265, train loss: 3.28747530, valid loss: 1369.07385692
Iter-266, train loss: 3.28660576, valid loss: 1423.80686093
Iter-267, train loss: 3.28574636, valid loss: 2156.25595456
Iter-268, train loss: 3.28489628, valid loss: 1808.87638664
Iter-269, train loss: 3.28405471, valid loss: 1368.64346702
Iter-270, train loss: 3.28322084, valid loss: 1351.80159420
Iter-271, train loss: 3.28239391, valid loss: 1586.62967551
Iter-272, train loss: 3.28157313, valid loss: 1377.79707222
Iter-273, train loss: 3.28075773, valid loss: 1264.83377731
Iter-274, train loss: 3.27994698, valid loss: 2118.50971448
Iter-275, train loss: 3.27914012, valid loss: 1453.12170882
Iter-276, train loss: 3.27833645, valid loss: 1608.00452705
Iter-277, train loss: 3.27753526, valid loss: 1381.45037364
Iter-278, train loss: 3.27673585, valid loss: 1423.04176891
Iter-279, train loss: 3.27593754, valid loss: 2169.45987411
Iter-280, train loss: 3.27513968, valid loss: 1755.19431933
Iter-281, train loss: 3.27434161, valid loss: 2152.85699880
Iter-282, train loss: 3.27354271, valid loss: 1455.05746056
Iter-283, train loss: 3.27274237, valid loss: 1639.55309212
Iter-284, train loss: 3.27193997, valid loss: 2031.56219354
Iter-285, train loss: 3.27113495, valid loss: 1864.88848013
Iter-286, train loss: 3.27032673, valid loss: 1849.94633043
Iter-287, train loss: 3.26951476, valid loss: 1559.12604607
Iter-288, train loss: 3.26869850, valid loss: 1691.78103173
Iter-289, train loss: 3.26787744, valid loss: 1546.63200994
Iter-290, train loss: 3.26705106, valid loss: 2138.93700757
Iter-291, train loss: 3.26621888, valid loss: 2013.45744126
Iter-292, train loss: 3.26538042, valid loss: 1870.96798221
Iter-293, train loss: 3.26453522, valid loss: 1886.25723779
Iter-294, train loss: 3.26368284, valid loss: 2187.33451456
Iter-295, train loss: 3.26282284, valid loss: 1958.91107035
Iter-296, train loss: 3.26195480, valid loss: 2382.55806142
Iter-297, train loss: 3.26107833, valid loss: 2239.63653122
Iter-298, train loss: 3.26019302, valid loss: 2011.90790362
Iter-299, train loss: 3.25929850, valid loss: 2065.78516913
Iter-300, train loss: 3.25839442, valid loss: 1852.02724590
Out[12]:
<__main__.GRU at 0x7f7cc19b8240>

In [13]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [14]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [22]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[0:200, 2], label='y_pred')
plt.plot(Y_valid[0:200, 2], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: