notebook.community

Edit and run



In [1]:

    
# Data: time-serie smartwatch or wristband or smartband data
# %matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy.io as spio

# Data reading
# # Linux-Ubuntu
# data_path = '/home/arasdar/data/Training_data/DATA_01_TYPE01.mat'
# Macbook
data_path = '/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat'
watch = spio.loadmat(data_path)
data = watch['sig']
data = np.array(data)
data.shape, watch['sig'].size/6

# Normalizing each batch of the data, each batch = each file
# Can we normalize them all at the same time?
mean = np.mean(data, axis=1)
var = np.var(data, axis=1)
std = np.sqrt(var)
mean.shape, var.shape, std.shape
mean = mean.reshape(-1, 1)
std = std.reshape(-1, 1)
mean.shape, std.shape
data_norm = (data - mean)/std
data.shape, data_norm.shape

# The features/ channels of the data = ['HR-ECG', 'HR-PPG', 'ACC-X', 'ACC-Y', 'ACC-Z']
plt.plot(data[0, :1000], label='ECG')
plt.plot(data[1, :1000], label='PPG-1')
plt.plot(data[2, :1000], label='PPG-2')
# plt.plot(x_sig[3, :1000], label='ACC-X')
# plt.plot(x_sig[4, :1000], label='SCC-Y')
# plt.plot(x_sig[5, :1000], label='ACC-Z')
plt.legend()
plt.show()

# The features/ channels of the data = ['HR-ECG', 'HR-PPG', 'ACC-X', 'ACC-Y', 'ACC-Z']
plt.plot(data_norm[0, :1000], label='ECG')
plt.plot(data_norm[1, :1000], label='PPG-1')
plt.plot(data_norm[2, :1000], label='PPG-2')
# plt.plot(x_sig[3, :1000], label='ACC-X')
# plt.plot(x_sig[4, :1000], label='SCC-Y')
# plt.plot(x_sig[5, :1000], label='ACC-Z')
plt.legend()
plt.show()

# ECG
Y = data_norm[0]
X = data_norm[1:]
Y.shape, X.shape
Y = Y.reshape(1, -1)
Y.shape, X.shape
plt.plot(Y[0, :1000], label='HR-ECG-1: Output signal')
plt.plot(X[0, :1000], label='HR-PPG-1: Input signal')
plt.plot(X[1, :1000], label='HR-PPG-2: Input signal')
plt.plot(X[2, :1000], label='ACC-X-3: Input signal')
plt.plot(X[3, :1000], label='ACC-Y-4: Input signal')
plt.plot(X[4, :1000], label='ACC-Z-5: Input signal')
plt.legend()
plt.show()
X_train = X.T
Y_train = Y.T
X_train.shape, Y_train.shape

# Preparing the training data for seq2seq learning
Y_train_in = Y_train[:-2]
Y_train_out = Y_train[:-1]
#     Y_train_in.shape, Y_train.shape
firstrow = np.zeros([1, 1])
#     firstrow.shape, Y_train_in.shape
Y_train_in = np.row_stack((firstrow, Y_train_in))
#     Y_train_in.shape, Y_train_in[:5], Y_train[:5]
#     Y_train_out = Y_train.copy()
X_train.shape, Y_train_in.shape, Y_train_out.shape
XY_train = (X_train, Y_train)

# Read and normalize one batch/file of data
def read_data(data_path):
    band = spio.loadmat(data_path)
    data = band['sig']
    data = np.array(data)
    #     data.shape, band['sig'].size/6

    # Normalizing each batch of the data, each batch = each file
    # Can we normalize them all at the same time?
    mean = np.mean(data, axis=1)
    var = np.var(data, axis=1)
    std = np.sqrt(var)
    mean.shape, var.shape, std.shape
    mean = mean.reshape(-1, 1)
    std = std.reshape(-1, 1)
    mean.shape, std.shape
    data_norm = (data - mean)/std
    #     print(data.shape, data_norm.shape)
    
    # ECG
    Y = data_norm[0]
    # PPG+ACC
    X = data_norm[1:]
    # Y.shape, X.shape
    Y = Y.reshape(1, -1)
    # Y.shape, X.shape
    X_train = X.T
    Y_train = Y.T
    #     print(X_train.shape, Y_train.shape)
    
    return X_train, Y_train



In [4]:

    
# Model or Network
import impl.layer as l

class GRU:

    def __init__(self, D, C, H, L, p_dropout):
        self.D = D # number of input dimensions
        self.H = H # number of hidden units
        self.L = L # number of hidden layers
        self.C = C # number of output classes/dimensions
        self.losses = {'train':[], 'smooth train':[]}
        self.p_dropout = p_dropout
        
        # Input sequence model parameters
        Z = H + D
        params_in_seq = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, D) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, D))
        )
        
        # This is the last layer in the input mode
        params_in_seq_ = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H))
        )
        
        # Output sequence model parameters
        Z = H + C
        params_out_seq = dict(
            Wz=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wr=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wh=np.random.randn(Z, H) / np.sqrt(Z / 2.),
            Wy=np.random.randn(H, C) / np.sqrt(H / 2.),
            bz=np.zeros((1, H)),
            br=np.zeros((1, H)),
            bh=np.zeros((1, H)),
            by=np.zeros((1, C))
        )

        # Model parameters
        self.model = []
        num_modes = 2 # num of modality as the source of sequences
        for _ in range(num_modes):
            self.model.append([])
        
        for _ in range(self.L-1):
            self.model[0].append(params_in_seq)

        # The last layer: self.L-1
        self.model[0].append(params_in_seq_)
        
        # Number of layers for each mode
        for _ in range(self.L):
            self.model[1].append(params_out_seq)
            
    def initial_state(self):
        return np.zeros((1, self.H))

    # p_dropout == keep_prob in this case
    def dropout_forward(self, X, p_dropout):
        u = np.random.binomial(1, p_dropout, size=X.shape) / p_dropout
        #         q = 1- p_dropout # q: keep_prob
        #         u = np.random.binomial(1, q, size=X.shape)
        out = X * u
        cache = u
        return out, cache
    
    def dropout_backward(self, dout, cache):
        u = cache
        dX = dout * u
        return dX
   
    def forward(self, X, h, m):
        Wz, Wr, Wh, Wy = m['Wz'], m['Wr'], m['Wh'], m['Wy']
        bz, br, bh, by = m['bz'], m['br'], m['bh'], m['by']

        X_one_hot = X.copy()
        h_old = h.copy()

        X = np.column_stack((h_old, X_one_hot))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X_prime = np.column_stack((hr * h_old, X_one_hot))
        hh, hh_cache = l.fc_forward(X_prime, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # The final output
        h = (1. - hz) * h_old + hz * hh
        y, y_cache = l.fc_forward(h, Wy, by)
        
        y, do_cache = l.dropout_forward(y, p_dropout=self.p_dropout)
        cache = (X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache,
                 hh, hh_cache, hh_tanh_cache, h, y_cache, do_cache)

        return y, h, cache

    def backward(self, dy, dh, cache):
        X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache, h, y_cache, do_cache = cache
        dy = l.dropout_backward(dy, do_cache)

        dh_next = dh.copy()
        
        dh, dWy, dby = l.fc_backward(dy, y_cache)
        dh += dh_next

        dhh = hz * dh
        dh_old1 = (1. - hz) * dh
        dhz = hh * dh - h_old * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dX_prime, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh_prime = dX_prime[:, :self.H]
        dh_old2 = hr * dh_prime

        dhr = h_old * dh_prime
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_old3 = dX[:, :self.H]

        # The final output
        dh = dh_old1 + dh_old2 + dh_old3
        dX = dX[:, self.H:]
        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, Wy=dWy, bz=dbz, br=dbr, bh=dbh, by=dby)
        
        return dX, dh, grad

    def forward_(self, X, h, m):
        Wz, Wr, Wh = m['Wz'], m['Wr'], m['Wh']
        bz, br, bh = m['bz'], m['br'], m['bh']

        X_one_hot = X.copy()
        h_old = h.copy()

        X = np.column_stack((h_old, X_one_hot))

        hz, hz_cache = l.fc_forward(X, Wz, bz)
        hz, hz_sigm_cache = l.sigmoid_forward(hz)

        hr, hr_cache = l.fc_forward(X, Wr, br)
        hr, hr_sigm_cache = l.sigmoid_forward(hr)

        X_prime = np.column_stack((hr * h_old, X_one_hot))
        hh, hh_cache = l.fc_forward(X_prime, Wh, bh)
        hh, hh_tanh_cache = l.tanh_forward(hh)

        # The output
        h = (1. - hz) * h_old + hz * hh
        cache = (X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, 
                 hh_tanh_cache)

        return h, cache

    def backward_(self, dh, cache):
        X, X_prime, h_old, hz, hz_cache, hz_sigm_cache, hr, hr_cache, hr_sigm_cache, hh, hh_cache, hh_tanh_cache = cache
        
        dhh = hz * dh
        dh_old1 = (1. - hz) * dh
        dhz = hh * dh - h_old * dh

        dhh = l.tanh_backward(dhh, hh_tanh_cache)
        dX_prime, dWh, dbh = l.fc_backward(dhh, hh_cache)

        dh_prime = dX_prime[:, :self.H]
        dh_old2 = hr * dh_prime

        dhr = h_old * dh_prime
        dhr = l.sigmoid_backward(dhr, hr_sigm_cache)
        dXr, dWr, dbr = l.fc_backward(dhr, hr_cache)

        dhz = l.sigmoid_backward(dhz, hz_sigm_cache)
        dXz, dWz, dbz = l.fc_backward(dhz, hz_cache)

        dX = dXr + dXz
        dh_old3 = dX[:, :self.H]

        # The final output
        dh = dh_old1 + dh_old2 + dh_old3
        dX = dX[:, self.H:]
        
        grad = dict(Wz=dWz, Wr=dWr, Wh=dWh, bz=dbz, br=dbr, bh=dbh)
        
        return dX, dh, grad
    
    def train_forward(self, XY_train, h):
        # Adding the output layer cache
        caches = []
        num_modes = 2
        for _ in range(num_modes):
            caches.append([])
        
        h_init = h.copy()
        h = []
        for _ in range(self.L):
            h.append(h_init.copy())
            caches[0].append([])
            caches[1].append([])
            
        ys = []
        X, Y = XY_train
        
        # Input sequence
        for x in X:
            #             print(x.shape)
            x= x.reshape(1, -1) # mat_1xn
            #             print(x.shape)
            
            for layer in range(self.L-1):
                x, h[layer], cache = self.forward(x, h[layer], self.model[0][layer])
                caches[0][layer].append(cache)
            
            # The last layer without output
            layer = self.L - 1
            h[layer], cache = self.forward_(x, h[layer], self.model[0][layer])
            caches[0][layer].append(cache)
            
        # Output sequence
        for y in Y:
            #             print(y.shape)
            y= y.reshape(1, -1) # mat_1xn
            #             print(y.shape)
            
            for layer in range(self.L):
                y, h[layer], cache = self.forward(y, h[layer], self.model[1][layer])
                caches[1][layer].append(cache)

            # Output list
            ys.append(y)
            
        # Testing how to convert list to np.array
        # print(np.array(ys).reshape(1, -1).shape, y.shape)
        # ys = np.array(ys).reshape(1, -1)
            
        return ys, caches

    def l2_regression(self, y_pred, y_train):
        m = y_pred.shape[0]
        #         print('y_pred.shape[0]', m)

        # (F(x)-y)^2: convex as X^2 or (aX-b)^2
        data_loss = 0.5 * np.sum((y_pred - y_train)**2) / m # number of dimensions
    
        return data_loss

    def dl2_regression(self, y_pred, y_train):
        m = y_pred.shape[0]
        #         print('y_pred.shape[0]', m)

        # (F(x)-y)^2: convex as X^2 or (aX-b)^2
        dy = (y_pred - y_train)/ m # number of dimensions

        return dy

    def loss_function(self, y_pred, y_train):
        loss, dys = 0.0, []
        
        # Trying to convert list to np.array
        # print(np.array(y_pred).reshape(-1, 1).shape, y_train.shape) # (n, 1)=nx1
        # y_pred = np.array(y_pred).reshape(-1, 1) # from (n, 1, 1) to (n, 1)
        
        for y, Y in zip(y_pred, y_train):
            loss += self.l2_regression(y_pred=y, y_train=Y) 
            dy = self.dl2_regression(y_pred=y, y_train=Y)
            dys.append(dy)
            
        # print(np.array(dys).reshape(-1, 1).shape)
        #         dys = np.array(dys).reshape(-1, 1)
        
        return loss, dys

    def train_backward(self, dys, caches):
        dh, grad, grads = [], [], []
        num_modes = 2
        for _ in range(num_modes):
            grad.append([])
            grads.append([])
        
        for _ in range(self.L):
            dh.append(np.zeros((1, self.H)))
            
        for mode in range(num_modes):
            for layer in range(self.L):
                grad[mode].append({key: np.zeros_like(val) for key, val in self.model[mode][layer].items()})
                grads[mode].append({key: np.zeros_like(val) for key, val in self.model[mode][layer].items()})

        # Output sequence
        mode = 1
        for t in reversed(range(len(dys))):
            dX = dys[t].copy()
            for layer in reversed(range(self.L)):
                dX, dh[layer], grad[mode][layer] = self.backward(dX, dh[layer], caches[mode][layer][t])
                for key in grad[mode][layer].keys():
                    grads[mode][layer][key] += grad[mode][layer][key]

        # Input sequence
        mode = 0
        for t in reversed(range(len(dys))):
            # Output layer or last layer
            layer = self.L-1
            dX, dh[layer], grad[mode][layer] = self.backward_(dh[layer], caches[mode][layer][t])
            for key in grad[mode][layer].keys():
                grads[mode][layer][key] += grad[mode][layer][key]

            # The depth and number of layers for RNN
            for layer in reversed(range(self.L-1)):
                dX, dh[layer], grad[mode][layer] = self.backward(dX, dh[layer], caches[mode][layer][t])
                for key in grad[mode][layer].keys():
                    grads[mode][layer][key] += grad[mode][layer][key]

        return dX, grads



In [5]:

    
def get_minibatch(X, Y, minibatch_size): # shuffle: this is for static data not dynamic/sequential data
    minibatches = []
    
    # for i in range(start=0, stop=X.shape[0], step=minibatch_size):
    #     for i in range(0, X.shape[0], minibatch_size):
    for i in range(0, X.shape[0] - minibatch_size + 1, 1):
        X_mini = X[i:i + minibatch_size]
        Y_mini = Y[i:i + minibatch_size]
        minibatches.append((X_mini, Y_mini))

    return minibatches

def adam_rnn(nn, alpha, mb_size, n_iter, print_after, n_files):
    M, R = [], []
    num_modes = 2
    for _ in range(num_modes):
        M.append([])
        R.append([])

    for mode in range(num_modes):
        for layer in range(nn.L):
            M[mode].append({key: np.zeros_like(val) for key, val in nn.model[mode][layer].items()})
            R[mode].append({key: np.zeros_like(val) for key, val in nn.model[mode][layer].items()})

    beta1 = .99
    beta2 = .999
    state = nn.initial_state()
    eps = 1e-8 # constant
    smooth_loss = 1.0 #np.log(len(X_train))

    # Epochs
    for iter in range(1, n_iter + 1):
        
        # Fullbatch: Read the new file, normalize it, and separate it into input and target
        for file_num in range(1, n_files + 1):
            
            if file_num == 1: data_path = '/Users/arasdar/data/Training_data/DATA_{:02}_TYPE01.mat'.format(file_num)
            else: data_path = '/Users/arasdar/data/Training_data/DATA_{:02}_TYPE02.mat'.format(file_num)
            #             if file_num == 1: data_path = '/home/arasdar/data/Training_data/DATA_{:02}_TYPE01.mat'.format(file_num)
            #             else: data_path = '/home/arasdar/data/Training_data/DATA_{:02}_TYPE02.mat'.format(file_num)
            print(data_path)

            # Read the mat files, normalize each batch of them, and seperate the output and input
            X_train, Y_train = read_data(data_path=data_path)
            #             X_train, Y_train_in = XY_train
            #             print(X_train.shape, Y_train_in.shape, Y_train_out.shape)
            
            # Minibatches/ Stochasticity
            #             minibatches = get_minibatch(X_train, Y_train, mb_size, shuffle=False)
            minibatches = get_minibatch(X=X_train, Y=Y_train, minibatch_size=mb_size) #, shuffle=False
            #             print('The number of minibatches in a sequence for each epoch iteration: {}'.format (len(minibatches)))

            # Minibatches/ Stochasticity
            #             print('The number of minibatches in a sequence for each epoch iteration: {}'.format (len(minibatches)))
            for idx in range(len(minibatches)):
                X_mini, Y_mini = minibatches[idx]
                Y_mini_out = Y_mini[:-1] # mat[t, n]== mat_txn
                Y_mini_in = np.row_stack((np.array([0.0]), Y_mini[:-2]))
                #                 print(X_mini.shape, Y_mini.shape, Y_mini_out.shape)
                # print(X_mini[:2], Y_mini_in[:2], Y_mini_out[:2])
                #                 print(Y_mini_in[:4], Y_mini_out[:4])
                #             print(nn.model[0][0]['Wz'].shape, nn.model[1][0]['Wz'].shape, nn.C, nn.D)
                #             print(nn.model[0][0]['Wr'].shape, nn.model[1][0]['Wh'].shape, nn.C, nn.D)

                XY_mini = (X_mini, Y_mini_in)
                ys, caches = nn.train_forward(XY_mini, state)
                loss, dys = nn.loss_function(y_train=Y_mini_out, y_pred=ys)
                _, grads = nn.train_backward(dys, caches)
                nn.losses['train'].append(loss)
                smooth_loss = (0.999 * smooth_loss) + (0.001 * loss)
                nn.losses['smooth train'].append(smooth_loss)

                # Descend
                for mode in range(num_modes):
                    for layer in range(nn.L):
                        for key in grads[mode][layer].keys(): #key, value: items
                            M[mode][layer][key] = l.exp_running_avg(M[mode][layer][key], grads[mode][layer][key], beta1)
                            R[mode][layer][key] = l.exp_running_avg(R[mode][layer][key], grads[mode][layer][key]**2, beta2)

                            m_k_hat = M[mode][layer][key] / (1. - (beta1**(iter)))
                            r_k_hat = R[mode][layer][key] / (1. - (beta2**(iter)))

                            nn.model[mode][layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + eps)
                            
        # Print training loss
        if iter % print_after == 0:
            print('Iter-{} training loss: {:.4f}'.format(iter, loss))
                
    return nn



In [6]:

    
# hyper parameters
n_iter = 13 # epochs: processing speed and how much time it takes.
n_files = 1 # dataset size: maximum number of files in dataset based on 12 subjects
print_after = n_iter//10 # print loss of train, valid, and test
time_step = 100 # width of the model or minibatch size
alpha = 1e-3 # learning_rate: 1e-3=0.001 - This is set to 1/miniatch_size (mb_size) or num_time_steps
num_hidden_units = 64 # width of the hidden layers or number of hidden units in hidden layer
num_hidden_layers = 1 # depth or number of hidden layer
num_input_units = X_train.shape[1] # number of input features/dimensions
num_output_units = Y_train.shape[1]
# X_train.shape, Y_train.shape
p_dropout = 0.95 # q=1-p, keep_prob=1-p_dropout, 0.05-0.10 usually for p_dropout

net = GRU(D=num_input_units, H=num_hidden_units, L=num_hidden_layers, C=num_output_units, p_dropout=p_dropout)

adam_rnn(nn=net, alpha=alpha, mb_size=time_step, n_iter=n_iter, n_files=n_files, print_after=print_after)









    



/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-1 training loss: 3.1597
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-2 training loss: 8.3511
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-3 training loss: 1.2749
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-4 training loss: 1.3427
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-5 training loss: 9.9304
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-6 training loss: 9.3767
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-7 training loss: 2.4425
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-8 training loss: 3.3579
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-9 training loss: 3.2911
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-10 training loss: 2.1036
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-11 training loss: 1.2990
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-12 training loss: 1.8926
/Users/arasdar/data/Training_data/DATA_01_TYPE01.mat
Iter-13 training loss: 1.6858






    Out[6]:





<__main__.GRU at 0x111057048>



In [9]:

    
# Display the learning curve and losses for training, validation, and testing
# % matplotlib inline
import matplotlib.pyplot as plt

# plt.plot(net.losses['train'], label='Train loss')
plt.plot(net.losses['smooth train'], label='Smooth train loss')
plt.legend()
plt.show()



In [ ]: