In [43]:
# Data: time-serie data from smartwatch or smartwatch data
# %matplotlib inline # for plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data reading
# The smartwatch historical/time-seris data to visualize
# data_path = 'data/smartwatch_data/experimental_data_analysis/Basis_Watch_Data.csv'
# data_path = 'data/financial_data/USD_INR.csv'
data_path = 'data/bike_data/hour.csv'
data = pd.read_csv(data_path)

# Data: cleaning
# Getting rid of NaN
data = data.fillna(value=0.0)

# Showing the data file csv or comma separated value
data[:10]


Out[43]:
instant dteday season yr mnth hr holiday weekday workingday weathersit temp atemp hum windspeed casual registered cnt
0 1 2011-01-01 1 0 1 0 0 6 0 1 0.24 0.2879 0.81 0.0000 3 13 16
1 2 2011-01-01 1 0 1 1 0 6 0 1 0.22 0.2727 0.80 0.0000 8 32 40
2 3 2011-01-01 1 0 1 2 0 6 0 1 0.22 0.2727 0.80 0.0000 5 27 32
3 4 2011-01-01 1 0 1 3 0 6 0 1 0.24 0.2879 0.75 0.0000 3 10 13
4 5 2011-01-01 1 0 1 4 0 6 0 1 0.24 0.2879 0.75 0.0000 0 1 1
5 6 2011-01-01 1 0 1 5 0 6 0 2 0.24 0.2576 0.75 0.0896 0 1 1
6 7 2011-01-01 1 0 1 6 0 6 0 1 0.22 0.2727 0.80 0.0000 2 0 2
7 8 2011-01-01 1 0 1 7 0 6 0 1 0.20 0.2576 0.86 0.0000 1 2 3
8 9 2011-01-01 1 0 1 8 0 6 0 1 0.24 0.2879 0.75 0.0000 1 7 8
9 10 2011-01-01 1 0 1 9 0 6 0 1 0.32 0.3485 0.76 0.0000 8 6 14

In [44]:
# # Plotting the smartwatch data before scaling/batch normalization
# data[:10000]['Price'].plot()
data[: 10].plot()
plt.legend()
plt.show()



In [45]:
data_array = np.array(data)
data_array.shape, data_array.dtype
data_main = np.array(data_array[:, -2:], dtype=float)
data_main.shape, data_main.dtype

plt.plot(data_main[:100])
plt.show()



In [46]:
mean = np.mean(data_main, axis=0)
std = np.std(data_main, axis=0)
std.shape, mean.shape, std.dtype, mean.dtype

data_norm = (data_main - mean) / std
plt.plot(data_norm[:100])
plt.show()
data_norm.mean(), data_norm.std(), data_norm.var(), data_norm.shape, data_norm.dtype


Out[46]:
(-5.5603781611930274e-17, 1.0, 1.0, (17379, 2), dtype('float64'))

In [47]:
train_data = data_norm[:16000] # the last dim/variable/feature
test_data = data_norm[16000:] # the last dim/variable/feature
train_data.shape, test_data.shape
X_train = train_data[0:15999]
Y_train = train_data[1:16000]
X_train.shape, Y_train.shape

plt.plot(X_train[:100])
plt.plot(Y_train[:100])
plt.show()



In [48]:
X_valid = test_data[0:1378] 
Y_valid = test_data[1:1379]
X_valid.shape, Y_valid.shape
plt.plot(X_valid[:100])
plt.plot(Y_valid[:100])
plt.show()



In [49]:
# Model or Network
import impl.layer as l
from impl.loss import *

class GRU:
    def __init__(self, D, H):
        self.D = D
        self.H = H
        self.losses = {'train':[], 'smooth train':[], 'valid': []}
        
        # Model params
        Z = H + D
        low, high = (-1.0), (+1.0)
        low_Z, high_Z = (low / np.sqrt(Z / 2.)), (high / np.sqrt(Z / 2.))
        low_H, high_H = (low / np.sqrt(H / 2.)), (high / np.sqrt(H / 2.))
        m = dict(
            #             Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            #             Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            Wz=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wr=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wh=np.random.uniform(size=(Z, H), low=low_Z, high=high_Z),
            Wy=np.random.uniform(size=(H, D), low=low_H, high=high_H),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        self.model = m
        
    def initial_state(self):
        return np.zeros((1, self.H))

    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_in = X.copy()
        h_in = h.copy()

        X = np.column_stack((h_in, X_in))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X = np.column_stack((hr * h_in, X_in))
        
        hh, hh_cache = l.fc_forward(X, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # h = (1. - hz) * h_old + hz * hh
        # or
        h = ((1. - hz) * h_in) + (hz * hh)
        # or
        # h = h_in + hz (hh - h_in)

        y, y_cache = l.fc_forward(h, Wy, by)
        
        cache = (h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, 
                 y_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        h_in, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, y_cache = cache
        
        dh_out = dh.copy()

        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_out

        dh_in1 = (1. - hz) * dh
        dhh = hz * dh
        dhz = (hh * dh) - (h_in * dh)
        # or
        # dhz = (hh - h_in) * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dXh, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh = dXh[:, :self.H]
        dX_in2 = dXh[:, self.H:]
        dh_in2 = hr * dh

        dhr = h_in * dh
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_in3 = dX[:, :self.H]
        dX_in1 = dX[:, self.H:]

        dh = dh_in1 + dh_in2 + dh_in3
        dX = dX_in1 + dX_in2

        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def train_forward(self, X_train, h):
        ys, fc_caches = [], []

        for X in X_train:
            X = X.reshape(1, -1) # X_1xn
            y, h, fc_cache = self.forward(X, h, self.model)
            fc_caches.append(fc_cache)
            ys.append(y)
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        caches = fc_caches
        
        return ys, caches
                                
    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []

        for y, Y in zip(y_pred, y_train):
            loss += l2_regression(y_pred=y, y_train=Y)
            dy = dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        return loss, dys
    
    def train_backward(self, dys, caches):
        fc_caches = caches
        
        dh = np.zeros((1, self.H)) 
        grad = {key: np.zeros_like(val) for key, val in self.model.items()}
        grads= {key: np.zeros_like(val) for key, val in self.model.items()}

        for t in reversed(range(len(dys))):
            dy = dys[t].reshape(1, -1) # dy_1xn
            dX, dh, grad = self.backward(dy, dh, fc_caches[t])
            for key in grad.keys():
                grads[key] += grad[key]
                
        return dX, grads # TODO: dX is not used but this is a REMINDER that it exists!
    
    def test(self, X_seed, h, size):
        ys = []
        X = X_seed.reshape(1, -1)
        for _ in range(size):
            y, h, _ = self.forward(X, h, self.model)
            X = y.copy() # previous out for the next input for prediction
            ys.append(y) # list array
        
        ys = np.array(ys, dtype=float).reshape(len(ys), -1) # ys_txn instead of ys_tx1xn
        return ys

In [50]:
def get_minibatch(X, y, minibatch_size, shuffle):
    minibatches = []

    # for i in range(0, X.shape[0] - minibatch_size + 1, 1):
    for i in range(0, X.shape[0], minibatch_size):
        X_mini = X[i:(i + minibatch_size)]
        y_mini = y[i:(i + minibatch_size)]
        minibatches.append((X_mini, y_mini))

    return minibatches

def adam_rnn(nn, XY_train, XY_valid, alpha, mb_size, n_iter, print_after):
    X_train, y_train = XY_train
    X_valid, y_valid = XY_valid

    # Momentum
    M= {key: np.zeros_like(val) for key, val in nn.model.items()}
    R= {key: np.zeros_like(val) for key, val in nn.model.items()}
    
    # Learning decay: suggested by Justin Jhonson in Standford
    beta1 = .9
    beta2 = .99
    state = nn.initial_state()
    smooth_loss = 1.
    minibatches = get_minibatch(X_train, y_train, mb_size, shuffle=False)
    
    # Epochs: iterating through the whole data
    for iter in range(1, n_iter + 1):
        
        # Minibatches
        for idx in range(len(minibatches)):
            
            # Train the model
            X_mini, y_mini = minibatches[idx]
            ys, caches = nn.train_forward(X_mini, state)
            loss, dys = nn.loss_function(y_pred=ys, y_train=y_mini) #, alpha=alpha
            _, grads = nn.train_backward(dys, caches)
            nn.losses['train'].append(loss)
            smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
            nn.losses['smooth train'].append(smooth_loss)
            
            # Update the model
            for key in grads.keys(): #key, value: items
                M[key] = l.exp_running_avg(M[key], grads[key], beta1)
                R[key] = l.exp_running_avg(R[key], grads[key]**2, beta2)
                m_k_hat = M[key] / (1. - (beta1** iter))
                r_k_hat = R[key] / (1. - (beta2** iter))
                nn.model[key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            # Validate the model (by testing)
            ys = nn.test(X_seed=X_valid[0], h=state, size=X_valid.shape[0]) # ys_tx1xn
            valid_loss, _ = nn.loss_function(y_pred=ys, y_train=Y_valid) #, alpha=alpha
            nn.losses['valid'].append(valid_loss)

        # Print the model loss/ error
        if iter % print_after == 0:
            print('Iter-{}, train loss: {:.8f}, valid loss: {:.8f}'.format(iter, loss, valid_loss))

    return nn

In [51]:
# Hyper-parameters
time_step = 128 # minibatch size: 32, 64, 128, or 256 Cache
n_iter = 200 # epochs
alpha = 1e-4 # learning_rate: 1e-3, 5e-4, 1e-4 - default choices
print_after = 1 # print training loss, valid, and test
num_hidden_units = 64 # num_hidden_units in hidden layer
num_input_units = X_train.shape[1] # X_txn: noise given by using all possible channels/ features

# Build the network and learning it or optimizing it using SGD
# def adam_rnn(nn, X_train, y_train, alpha=0.001, mb_size=256, n_iter=2000, print_after=100):
net = GRU(D=num_input_units, H=num_hidden_units) 

# Start learning using BP-SGD-ADAM
adam_rnn(nn=net, XY_train=(X_train, Y_train), XY_valid=(X_valid, Y_valid), alpha=alpha, mb_size=time_step,
         n_iter=n_iter, print_after=print_after)


Iter-1, train loss: 52.48679206, valid loss: 687.64467528
Iter-2, train loss: 47.12981756, valid loss: 687.53371487
Iter-3, train loss: 43.28666262, valid loss: 687.19000465
Iter-4, train loss: 40.10471976, valid loss: 686.89622298
Iter-5, train loss: 37.45230600, valid loss: 686.76546199
Iter-6, train loss: 35.22044607, valid loss: 686.91449931
Iter-7, train loss: 33.26902265, valid loss: 687.52648750
Iter-8, train loss: 31.50173104, valid loss: 688.83683485
Iter-9, train loss: 29.86023016, valid loss: 691.14340395
Iter-10, train loss: 28.30119011, valid loss: 694.87638208
Iter-11, train loss: 26.78799346, valid loss: 700.74888783
Iter-12, train loss: 25.29076298, valid loss: 709.98401037
Iter-13, train loss: 23.79029250, valid loss: 724.83141468
Iter-14, train loss: 22.28328086, valid loss: 749.58901274
Iter-15, train loss: 20.78590738, valid loss: 792.01424047
Iter-16, train loss: 19.33333990, valid loss: 862.00852086
Iter-17, train loss: 17.97096054, valid loss: 958.76187693
Iter-18, train loss: 16.73704808, valid loss: 1059.44863584
Iter-19, train loss: 15.64971051, valid loss: 1141.27772711
Iter-20, train loss: 14.70940757, valid loss: 1197.98837692
Iter-21, train loss: 13.90620485, valid loss: 1231.54166111
Iter-22, train loss: 13.22223629, valid loss: 1244.46514701
Iter-23, train loss: 12.63423284, valid loss: 1238.33319653
Iter-24, train loss: 12.11942944, valid loss: 1214.61656438
Iter-25, train loss: 11.66047155, valid loss: 1194.41204696
Iter-26, train loss: 11.24614476, valid loss: 1194.29231082
Iter-27, train loss: 10.86959291, valid loss: 1245.48044211
Iter-28, train loss: 10.52631637, valid loss: 1304.80334436
Iter-29, train loss: 10.21275791, valid loss: 1416.01236948
Iter-30, train loss: 9.92551659, valid loss: 1526.72688256
Iter-31, train loss: 9.66110724, valid loss: 1262.40765459
Iter-32, train loss: 9.41609003, valid loss: 1549.64562459
Iter-33, train loss: 9.18732433, valid loss: 1366.34643677
Iter-34, train loss: 8.97214210, valid loss: 775.95718263
Iter-35, train loss: 8.76836872, valid loss: 758.97934855
Iter-36, train loss: 8.57425029, valid loss: 750.42061287
Iter-37, train loss: 8.38837088, valid loss: 745.38980528
Iter-38, train loss: 8.20959434, valid loss: 742.67353582
Iter-39, train loss: 8.03703130, valid loss: 741.46426766
Iter-40, train loss: 7.87002225, valid loss: 741.25718845
Iter-41, train loss: 7.70812567, valid loss: 741.90948338
Iter-42, train loss: 7.55110296, valid loss: 744.24687661
Iter-43, train loss: 7.39889622, valid loss: 752.35853701
Iter-44, train loss: 7.25159904, valid loss: 770.74350821
Iter-45, train loss: 7.10942240, valid loss: 1094.45027048
Iter-46, train loss: 6.97265865, valid loss: 1163.60902111
Iter-47, train loss: 6.84164627, valid loss: 1422.85500415
Iter-48, train loss: 6.71673733, valid loss: 1623.22967379
Iter-49, train loss: 6.59826905, valid loss: 1598.01315358
Iter-50, train loss: 6.48654036, valid loss: 1453.31025896
Iter-51, train loss: 6.38179391, valid loss: 1318.52523744
Iter-52, train loss: 6.28420402, valid loss: 1362.00331257
Iter-53, train loss: 6.19387011, valid loss: 1358.11226392
Iter-54, train loss: 6.11081522, valid loss: 1384.20423248
Iter-55, train loss: 6.03498819, valid loss: 1440.24715754
Iter-56, train loss: 5.96626805, valid loss: 1473.05423640
Iter-57, train loss: 5.90446932, valid loss: 1334.23582349
Iter-58, train loss: 5.84934721, valid loss: 1538.93312335
Iter-59, train loss: 5.80060235, valid loss: 1554.39684763
Iter-60, train loss: 5.75788525, valid loss: 1559.11569087
Iter-61, train loss: 5.72080081, valid loss: 1584.35446504
Iter-62, train loss: 5.68891372, valid loss: 1541.33165487
Iter-63, train loss: 5.66175520, valid loss: 1482.68752175
Iter-64, train loss: 5.63883165, valid loss: 1534.51077201
Iter-65, train loss: 5.61963531, valid loss: 1546.11052554
Iter-66, train loss: 5.60365658, valid loss: 1531.57233765
Iter-67, train loss: 5.59039727, valid loss: 1551.17724987
Iter-68, train loss: 5.57938371, valid loss: 1570.42884252
Iter-69, train loss: 5.57017855, valid loss: 1576.79732564
Iter-70, train loss: 5.56239008, valid loss: 1571.15308266
Iter-71, train loss: 5.55567832, valid loss: 1574.98949670
Iter-72, train loss: 5.54975755, valid loss: 1579.06148160
Iter-73, train loss: 5.54439545, valid loss: 1582.91286991
Iter-74, train loss: 5.53940943, valid loss: 1583.57940565
Iter-75, train loss: 5.53466077, valid loss: 1587.95849378
Iter-76, train loss: 5.53004761, valid loss: 1599.12131556
Iter-77, train loss: 5.52549753, valid loss: 1576.30917005
Iter-78, train loss: 5.52096028, valid loss: 1570.56960090
Iter-79, train loss: 5.51640128, valid loss: 1557.95511360
Iter-80, train loss: 5.51179604, valid loss: 1559.32492728
Iter-81, train loss: 5.50712570, valid loss: 1547.82770817
Iter-82, train loss: 5.50237360, valid loss: 1533.37385385
Iter-83, train loss: 5.49752293, valid loss: 1494.03643894
Iter-84, train loss: 5.49255507, valid loss: 1482.57166969
Iter-85, train loss: 5.48744885, valid loss: 1494.99711636
Iter-86, train loss: 5.48218013, valid loss: 1499.11183227
Iter-87, train loss: 5.47672189, valid loss: 1494.60028230
Iter-88, train loss: 5.47104458, valid loss: 1483.85216494
Iter-89, train loss: 5.46511659, valid loss: 1481.53821470
Iter-90, train loss: 5.45890477, valid loss: 1486.57936083
Iter-91, train loss: 5.45237504, valid loss: 1493.08444251
Iter-92, train loss: 5.44549282, valid loss: 1511.73063151
Iter-93, train loss: 5.43822354, valid loss: 1545.20454745
Iter-94, train loss: 5.43053296, valid loss: 1572.23164938
Iter-95, train loss: 5.42238738, valid loss: 1539.80057585
Iter-96, train loss: 5.41375388, valid loss: 1399.24896901
Iter-97, train loss: 5.40460032, valid loss: 1473.16610515
Iter-98, train loss: 5.39489543, valid loss: 1501.55652557
Iter-99, train loss: 5.38460874, valid loss: 1539.79366282
Iter-100, train loss: 5.37371056, valid loss: 1580.22016464
Iter-101, train loss: 5.36217196, valid loss: 1592.26383944
Iter-102, train loss: 5.34996479, valid loss: 1580.46473800
Iter-103, train loss: 5.33706173, valid loss: 1568.39080767
Iter-104, train loss: 5.32343652, valid loss: 1565.11505426
Iter-105, train loss: 5.30906424, valid loss: 1580.21617613
Iter-106, train loss: 5.29392175, valid loss: 1601.86384894
Iter-107, train loss: 5.27798838, valid loss: 1631.54714998
Iter-108, train loss: 5.26124669, valid loss: 1668.32555940
Iter-109, train loss: 5.24368346, valid loss: 1702.20660945
Iter-110, train loss: 5.22529090, valid loss: 1731.12530116
Iter-111, train loss: 5.20606781, valid loss: 1754.81034376
Iter-112, train loss: 5.18602101, valid loss: 1767.52316542
Iter-113, train loss: 5.16516655, valid loss: 1777.80767616
Iter-114, train loss: 5.14353088, valid loss: 1794.84886756
Iter-115, train loss: 5.12115175, valid loss: 1821.89029657
Iter-116, train loss: 5.09807868, valid loss: 1833.39142114
Iter-117, train loss: 5.07437293, valid loss: 1836.44931176
Iter-118, train loss: 5.05010689, valid loss: 1821.11993615
Iter-119, train loss: 5.02536280, valid loss: 1811.07224912
Iter-120, train loss: 5.00023088, valid loss: 1794.00269315
Iter-121, train loss: 4.97480690, valid loss: 1809.42538259
Iter-122, train loss: 4.94918938, valid loss: 1848.55727193
Iter-123, train loss: 4.92347658, valid loss: 1859.44002970
Iter-124, train loss: 4.89776361, valid loss: 1629.20810339
Iter-125, train loss: 4.87213967, valid loss: 1675.59382227
Iter-126, train loss: 4.84668582, valid loss: 1702.06838968
Iter-127, train loss: 4.82147329, valid loss: 1023.52341272
Iter-128, train loss: 4.79656242, valid loss: 1893.95377120
Iter-129, train loss: 4.77200222, valid loss: 1918.23877653
Iter-130, train loss: 4.74783054, valid loss: 1847.30601617
Iter-131, train loss: 4.72407470, valid loss: 1828.70483594
Iter-132, train loss: 4.70075249, valid loss: 1842.87676538
Iter-133, train loss: 4.67787341, valid loss: 1863.83689325
Iter-134, train loss: 4.65544000, valid loss: 1878.96406976
Iter-135, train loss: 4.63344931, valid loss: 1901.21466732
Iter-136, train loss: 4.61189414, valid loss: 1915.95667197
Iter-137, train loss: 4.59076430, valid loss: 1918.81008799
Iter-138, train loss: 4.57004764, valid loss: 1918.78576311
Iter-139, train loss: 4.54973096, valid loss: 1912.62023293
Iter-140, train loss: 4.52980069, valid loss: 1896.07116924
Iter-141, train loss: 4.51024346, valid loss: 1878.12355235
Iter-142, train loss: 4.49104643, valid loss: 1862.00994607
Iter-143, train loss: 4.47219759, valid loss: 1838.54389327
Iter-144, train loss: 4.45368590, valid loss: 1859.83186685
Iter-145, train loss: 4.43550128, valid loss: 1880.21145458
Iter-146, train loss: 4.41763465, valid loss: 1911.64296152
Iter-147, train loss: 4.40007786, valid loss: 1944.90776676
Iter-148, train loss: 4.38282355, valid loss: 1973.02146365
Iter-149, train loss: 4.36586507, valid loss: 1993.51643594
Iter-150, train loss: 4.34919632, valid loss: 2006.83239887
Iter-151, train loss: 4.33281169, valid loss: 2014.40525574
Iter-152, train loss: 4.31670588, valid loss: 2018.21133459
Iter-153, train loss: 4.30087385, valid loss: 2020.09577350
Iter-154, train loss: 4.28531069, valid loss: 2045.71806888
Iter-155, train loss: 4.27001157, valid loss: 2041.63508405
Iter-156, train loss: 4.25497171, valid loss: 2047.37541421
Iter-157, train loss: 4.24018630, valid loss: 2038.91366001
Iter-158, train loss: 4.22565047, valid loss: 2032.07706462
Iter-159, train loss: 4.21135931, valid loss: 2078.47504820
Iter-160, train loss: 4.19730785, valid loss: 2067.63963750
Iter-161, train loss: 4.18349105, valid loss: 2049.64242751
Iter-162, train loss: 4.16990382, valid loss: 2034.53789507
Iter-163, train loss: 4.15654103, valid loss: 2015.91318868
Iter-164, train loss: 4.14339755, valid loss: 2000.97728337
Iter-165, train loss: 4.13046824, valid loss: 2013.86011747
Iter-166, train loss: 4.11774797, valid loss: 2020.49106891
Iter-167, train loss: 4.10523166, valid loss: 2033.16892731
Iter-168, train loss: 4.09291430, valid loss: 2034.52812047
Iter-169, train loss: 4.08079096, valid loss: 2044.46056095
Iter-170, train loss: 4.06885680, valid loss: 2032.25253239
Iter-171, train loss: 4.05710709, valid loss: 2034.44297749
Iter-172, train loss: 4.04553722, valid loss: 2037.33305356
Iter-173, train loss: 4.03414274, valid loss: 2038.31105883
Iter-174, train loss: 4.02291931, valid loss: 1954.60592717
Iter-175, train loss: 4.01186274, valid loss: 1972.99782575
Iter-176, train loss: 4.00096901, valid loss: 1981.57583642
Iter-177, train loss: 3.99023423, valid loss: 2060.06924131
Iter-178, train loss: 3.97965466, valid loss: 2070.50126682
Iter-179, train loss: 3.96922670, valid loss: 2076.91573277
Iter-180, train loss: 3.95894691, valid loss: 2079.02604757
Iter-181, train loss: 3.94881196, valid loss: 2079.19837855
Iter-182, train loss: 3.93881867, valid loss: 2046.96288053
Iter-183, train loss: 3.92896397, valid loss: 2039.39354421
Iter-184, train loss: 3.91924490, valid loss: 2043.76277471
Iter-185, train loss: 3.90965860, valid loss: 2051.45986774
Iter-186, train loss: 3.90020233, valid loss: 2065.73900202
Iter-187, train loss: 3.89087342, valid loss: 2066.19300016
Iter-188, train loss: 3.88166927, valid loss: 2065.03136860
Iter-189, train loss: 3.87258736, valid loss: 2070.55273007
Iter-190, train loss: 3.86362524, valid loss: 1984.56688421
Iter-191, train loss: 3.85478051, valid loss: 2043.61643010
Iter-192, train loss: 3.84605080, valid loss: 1994.56493832
Iter-193, train loss: 3.83743380, valid loss: 1959.57281729
Iter-194, train loss: 3.82892724, valid loss: 2051.83180454
Iter-195, train loss: 3.82052886, valid loss: 1984.29151126
Iter-196, train loss: 3.81223644, valid loss: 1961.70680184
Iter-197, train loss: 3.80404778, valid loss: 1966.49744102
Iter-198, train loss: 3.79596069, valid loss: 1962.48346042
Iter-199, train loss: 3.78797300, valid loss: 1946.03115462
Iter-200, train loss: 3.78008256, valid loss: 1937.62792952
Out[51]:
<__main__.GRU at 0x7f38d22009e8>

In [52]:
import matplotlib.pyplot as plt

plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Train smooth loss')
plt.legend()
plt.show()



In [53]:
import matplotlib.pyplot as plt

plt.plot(net.losses['valid'], label='Validation loss')
plt.legend()
plt.show()



In [56]:
import matplotlib.pyplot as plt

y_pred = net.test(X_seed=X_valid[0], h=net.initial_state(), size=X_valid.shape[0]) # ys_tx1xn
y_pred.shape, Y_valid.shape

plt.plot(y_pred[0:100, 1], label='y_pred')
plt.plot(Y_valid[0:100, 1], label='Y_valid')
# plt.plot(X_valid[:100], label='X_valid')
plt.legend()
plt.show()



In [ ]:


In [ ]: