In [51]:
import pandas as pd # to read CSV files (Comma Separated Values)

train_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.x.csv')
train_x.head()


Out[51]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att18 att19 att20 att21 att22 att23 att24 att25 att26 msd_track_id
0 1 41.08 6.579 4.307 3.421 3.192 2.076 2.179 2.052 1.794 ... 1.3470 -0.2463 -1.5470 0.17920 -1.1530 -0.7370 0.40750 -0.67190 -0.05147 TRPLTEM128F92E1389
1 2 60.80 5.973 4.344 3.261 2.835 2.725 2.446 1.884 1.962 ... -0.3316 0.3519 -1.4760 0.52700 -2.1960 1.5990 -1.39000 0.22560 -0.72080 TRJWMBQ128F424155E
2 3 51.47 4.971 4.316 2.916 3.112 2.290 2.053 1.934 1.878 ... -0.2803 -0.1603 -0.1355 1.03500 0.2370 1.4890 0.02959 -0.13670 0.10820 TRRZWMO12903CCFCC2
3 4 41.28 6.610 4.411 2.602 2.822 2.126 1.984 1.973 1.945 ... -1.6930 1.0040 -0.3953 0.26710 -1.0450 0.4974 0.03724 1.04500 -0.20000 TRBZRUT12903CE6C04
4 5 54.17 8.945 4.685 4.208 3.154 3.527 2.733 2.202 2.686 ... 2.4690 -0.5449 -0.5622 -0.08968 -0.9823 -0.2445 -1.65800 -0.04825 -0.70950 TRLUJQF128F42AF5BF

5 rows × 28 columns


In [52]:
train_y = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.y.csv')
train_y.head()


Out[52]:
Id class_label
0 1 International
1 2 Vocal
2 3 Latin
3 4 Blues
4 5 Vocal

In [53]:
test_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/test.x.csv')
test_x.head()


Out[53]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att17 att18 att19 att20 att21 att22 att23 att24 att25 att26
0 1 38.22 8.076 6.935 4.696 3.856 3.465 2.922 2.568 2.070 ... 3.988 0.4957 0.1836 -2.2210 0.6453 -0.2923 1.2000 -0.09179 0.4674 0.2158
1 2 36.42 6.131 5.364 4.292 3.968 2.937 2.872 2.142 2.050 ... 7.098 1.2290 0.5971 -1.0670 0.9569 -1.8240 2.3130 -0.80890 0.5612 -0.6225
2 3 70.01 5.496 4.698 3.699 3.258 2.293 2.680 2.226 2.034 ... 4.449 0.4773 1.6370 -1.0690 2.4160 -0.6299 1.4190 -0.81960 0.9151 -0.5948
3 4 40.64 7.281 6.702 4.043 3.729 3.043 2.644 2.366 1.940 ... 2.785 1.9000 -1.1370 1.2750 1.7920 -2.1250 1.6090 -0.83230 -0.1998 -0.1218
4 5 38.85 7.118 5.703 4.825 4.088 3.823 3.254 2.551 2.193 ... 4.536 2.1470 1.0200 -0.2656 2.8050 0.2762 0.2504 1.04900 0.3447 -0.7689

5 rows × 27 columns


In [54]:
test_y_sample = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/submission-random.csv')
test_y_sample.head()


Out[54]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [55]:
test_y_sample[:0]


Out[55]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal

In [56]:
import numpy as np

train_X = np.array(train_x)
train_Y = np.array(train_y[:]['class_label'])
test_X = np.array(test_x)

# Getting rid of the first and the last column: Id and msd_track_id
X_train_val = np.array(train_X[:, 1:-1], dtype=float)
X_test = np.array(test_X[:, 1:], dtype=float)

train_Y.shape


Out[56]:
(13000,)

In [57]:
from collections import Counter

# Count the freq of the keys in the training labels
counted_labels = Counter(train_Y)
labels_keys = counted_labels.keys()
labels_keys


Out[57]:
dict_keys(['Folk', 'Electronic', 'Vocal', 'Blues', 'International', 'Rap', 'Pop_Rock', 'Reggae', 'RnB', 'Jazz', 'New_Age', 'Country', 'Latin'])

In [58]:
labels_keys_sorted = sorted(labels_keys)
labels_keys_sorted


Out[58]:
['Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [59]:
# This for loop for creating a dictionary/ vocab
key_to_val = {key: val for val, key in enumerate(labels_keys_sorted)}
key_to_val['Country']
key_to_val


Out[59]:
{'Blues': 0,
 'Country': 1,
 'Electronic': 2,
 'Folk': 3,
 'International': 4,
 'Jazz': 5,
 'Latin': 6,
 'New_Age': 7,
 'Pop_Rock': 8,
 'Rap': 9,
 'Reggae': 10,
 'RnB': 11,
 'Vocal': 12}

In [60]:
val_to_key = {val: key for val, key in enumerate(labels_keys_sorted)}
val_to_key[1]
val_to_key


Out[60]:
{0: 'Blues',
 1: 'Country',
 2: 'Electronic',
 3: 'Folk',
 4: 'International',
 5: 'Jazz',
 6: 'Latin',
 7: 'New_Age',
 8: 'Pop_Rock',
 9: 'Rap',
 10: 'Reggae',
 11: 'RnB',
 12: 'Vocal'}

In [61]:
Y_train_vec = []
for each in train_y[:]['class_label']:
#     print(each, key_to_val[each])
    Y_train_vec.append(key_to_val[each])

Y_train_val = np.array(Y_train_vec)
Y_train_val.shape


Out[61]:
(13000,)

In [62]:
# # Pre-processing: normalizing
# def normalize(X):
#     # max scale for images 255= 2**8= 8 bit grayscale for each channel
#     return (X - X.mean(axis=0)) #/ X.std(axis=0)
# X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)

# Preprocessing: normalizing the data based on the training set
mean = X_train_val.mean(axis=0)
std = X_train_val.std(axis=0)

X_train_val, X_test = (X_train_val - mean)/ std, (X_test - mean)/ std
X_train_val.shape, X_test.shape, X_train_val.dtype, X_test.dtype


Out[62]:
((13000, 26), (10400, 26), dtype('float64'), dtype('float64'))

In [63]:
# Creating validation set: 10% or 1/10 of the training set or whatever dataset with labels/ annotation
valid_size = X_train_val.shape[0]//10
valid_size
X_val = X_train_val[-valid_size:]
Y_val = Y_train_val[-valid_size:]
X_train = X_train_val[: -valid_size]
Y_train = Y_train_val[: -valid_size]
X_train_val.shape, 
X_train.shape, X_val.shape, X_test.shape, Y_val.shape, Y_train.shape 
# X_train.dtype, X_val.dtype
# Y_train.dtype, Y_val


Out[63]:
((11700, 26), (1300, 26), (10400, 26), (1300,), (11700,))

In [64]:
# Model
import impl.layer as l # or from impl.layer import *
from impl.loss import * # import all functions from impl.loss file # import impl.loss as loss_func
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L, keep_prob):
        self.L = L # number of layers or depth
        self.losses = {'train':[], 'train_acc':[], 'valid':[], 'valid_acc':[]}
        self.keep_prob = keep_prob # 1 - p_dropout
        
        # The adaptive/learnable/updatable random feedforward
        self.model = []
        self.grads = []
        low, high = -1, 1
        
        # Input layer: weights/ biases
        m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), 
                 b=np.zeros((1, H)))
        self.model.append(m)
        # Input layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Hidden layers: weights/ biases
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), 
                     b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L)
        # Hidden layer: gradients
        grad_L = []
        for _ in range(L):
            grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
        self.grads.append(grad_L)
        
        # Output layer: weights/ biases
        m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), 
                 b=np.zeros((1, C)))
        self.model.append(m)
        # Outout layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache):
        W, X = cache

        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        dX = dout @ W.T # Backprop

        return dX, dW, db

    def train_forward(self, X, train):
        caches = []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
        y, nl_cache = l.selu_forward(X=y)
        if train:
            caches.append((fc_cache, nl_cache))
        X = y.copy() # pass to the next layer
        
        # Hidden layers
        fc_caches, nl_caches = [], []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
            y, nl_cache = l.selu_forward(X=y)
            if train:
                fc_caches.append(fc_cache)
                nl_caches.append(nl_cache)
            X = y.copy() # pass to next layer
        if train:
            caches.append((fc_caches, nl_caches)) # caches[1]            
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        # Softmax is included in loss function
        if train:
            caches.append(fc_cache)

        return y, caches # for backpropating the error

    def loss_function(self, y, y_train):
        
        loss = cross_entropy(y, y_train) # softmax is included
        dy = dcross_entropy(y, y_train) # dsoftmax is included
        
        return loss, dy
        
    def train_backward(self, dy, caches):
        grads = self.grads # initialized by Zero in every iteration/epoch
        
        # Output layer
        fc_cache = caches[2]
        # dSoftmax is included in loss function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        dy = dX.copy()
        grads[2]['W'] = dW
        grads[2]['b'] = db

        # Hidden layer
        fc_caches, nl_caches = caches[1]
        for layer in reversed(range(self.L)):
            dy = l.selu_backward(cache=nl_caches[layer], dout=dy) # diffable function
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer])
            dy = dX.copy()
            grads[1][layer]['W'] = dW
            grads[1][layer]['b'] = db
        
        # Input layer
        fc_cache, nl_cache = caches[0]
        dy = l.selu_backward(cache=nl_cache, dout=dy) # diffable function
        _, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        grads[0]['W'] = dW
        grads[0]['b'] = db

        return grads
    
    def test(self, X):
        y_logit, _ = self.train_forward(X, train=False)
        
        # if self.mode == 'classification':
        y_prob = l.softmax(y_logit) # for accuracy == acc
        y_pred = np.argmax(y_prob, axis=1) # for loss ==err
        
        return y_pred, y_logit
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set

        # Momentums
        M, R = [], []
        M.append({key: np.zeros_like(val) for key, val in self.model[0].items()})
        R.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        M_, R_ = [], []
        for layer in range(self.L):
            M_.append({key: np.zeros_like(val) for key, val in self.model[1][layer].items()})
            R_.append({key: np.zeros_like(val) for key, val in self.model[1][layer].items()})
        M.append(M_)
        R.append(R_)

        M.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        R.append({key: np.zeros_like(val) for key, val in self.model[2].items()})

        # Learning decay
        beta1 = .9
        beta2 = .99

        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            y, caches = self.train_forward(X_mini, train=True)
            _, dy = self.loss_function(y, y_mini)
            grad = self.train_backward(dy, caches) 
            
            # Update the model
            for key in grad[0].keys():
                M[0][key] = l.exp_running_avg(M[0][key], grad[0][key], beta1)
                R[0][key] = l.exp_running_avg(R[0][key], grad[0][key]**2, beta2)
                m_k_hat = M[0][key] / (1. - (beta1**(iter)))
                r_k_hat = R[0][key] / (1. - (beta2**(iter)))
                self.model[0][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            for layer in range(self.L):
                for key in grad[1][layer].keys():
                    M[1][layer][key] = l.exp_running_avg(M[1][layer][key], grad[1][layer][key], beta1)
                    R[1][layer][key] = l.exp_running_avg(R[1][layer][key], grad[1][layer][key]**2, beta2)
                    m_k_hat = M[1][layer][key] / (1. - (beta1**(iter)))
                    r_k_hat = R[1][layer][key] / (1. - (beta2**(iter)))
                    self.model[1][layer][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)

            for key in grad[2].keys():
                M[2][key] = l.exp_running_avg(M[2][key], grad[2][key], beta1)
                R[2][key] = l.exp_running_avg(R[2][key], grad[2][key]**2, beta2)
                m_k_hat = M[2][key] / (1. - (beta1**(iter)))
                r_k_hat = R[2][key] / (1. - (beta2**(iter)))
                self.model[2][key] -= alpha * m_k_hat / (np.sqrt(r_k_hat) + l.eps)
                
            # Trained model info
            y_pred, y_logit = self.test(X_mini)
            loss, _ = self.loss_function(y_logit, y_mini) # softmax is included in entropy loss function
            self.losses['train'].append(loss)
            acc = np.mean(y_pred == y_mini) # confusion matrix
            self.losses['train_acc'].append(acc)

            # Validated model info
            y_pred, y_logit = self.test(X_val)
            valid_loss, _ = self.loss_function(y_logit, y_val) # softmax is included in entropy loss function
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val) # confusion matrix
            self.losses['valid_acc'].append(valid_acc)
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter: {}, train loss: {:.4f}, train acc: {:.4f}, valid loss: {:.4f}, valid acc: {:.4f}'.format(
                    iter, loss, acc, valid_loss, valid_acc))

            #         # Test the final model
            #         y_pred, y_logit = nn.test(X_test)
            #         loss, _ = self.loss_function(y_logit, y_test) # softmax is included in entropy loss function
            #         acc = np.mean(y_pred == y_test)
            #         print('Last iteration - Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.format(
            #             acc.mean(), acc.std(), loss))

In [65]:
Y_train.shape, X_train.shape, X_val.shape, Y_val.shape


Out[65]:
((11700,), (11700, 26), (1300, 26), (1300,))

In [66]:
# Hyper-parameters
n_iter = 10000 # number of epochs
alpha = 1e-3 # learning_rate
mb_size = 64 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 100 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = Y_train.max() + 1 # number of classes in this classification problem
# num_output_units = Y_train.shape[1] # number of classes in this classification problem
num_layers = 2 # depth
keep_prob = 0.95 # SELU dropout

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers, keep_prob=keep_prob)

nn.sgd(train_set=(X_train, Y_train), val_set=(X_val, Y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)


Iter: 100, train loss: 2.0799, train acc: 0.3125, valid loss: 2.1988, valid acc: 0.2669
Iter: 200, train loss: 2.1253, train acc: 0.3594, valid loss: 2.1337, valid acc: 0.3054
Iter: 300, train loss: 2.1808, train acc: 0.3125, valid loss: 2.0928, valid acc: 0.3177
Iter: 400, train loss: 1.8112, train acc: 0.3125, valid loss: 2.0758, valid acc: 0.3277
Iter: 500, train loss: 1.9613, train acc: 0.3438, valid loss: 2.0609, valid acc: 0.3300
Iter: 600, train loss: 2.0173, train acc: 0.3594, valid loss: 2.0526, valid acc: 0.3331
Iter: 700, train loss: 1.7603, train acc: 0.3594, valid loss: 2.0387, valid acc: 0.3292
Iter: 800, train loss: 1.7857, train acc: 0.3750, valid loss: 2.0305, valid acc: 0.3323
Iter: 900, train loss: 2.1262, train acc: 0.3438, valid loss: 2.0212, valid acc: 0.3438
Iter: 1000, train loss: 1.7583, train acc: 0.4375, valid loss: 2.0149, valid acc: 0.3508
Iter: 1100, train loss: 1.8847, train acc: 0.3594, valid loss: 2.0148, valid acc: 0.3408
Iter: 1200, train loss: 2.2867, train acc: 0.3438, valid loss: 2.0039, valid acc: 0.3431
Iter: 1300, train loss: 1.9065, train acc: 0.3594, valid loss: 2.0051, valid acc: 0.3400
Iter: 1400, train loss: 2.0015, train acc: 0.3750, valid loss: 2.0107, valid acc: 0.3462
Iter: 1500, train loss: 1.7813, train acc: 0.4062, valid loss: 1.9916, valid acc: 0.3569
Iter: 1600, train loss: 1.6513, train acc: 0.5156, valid loss: 1.9890, valid acc: 0.3485
Iter: 1700, train loss: 1.9226, train acc: 0.2969, valid loss: 1.9881, valid acc: 0.3515
Iter: 1800, train loss: 1.9271, train acc: 0.3750, valid loss: 1.9960, valid acc: 0.3585
Iter: 1900, train loss: 1.9561, train acc: 0.3125, valid loss: 1.9849, valid acc: 0.3577
Iter: 2000, train loss: 1.7468, train acc: 0.4062, valid loss: 1.9981, valid acc: 0.3469
Iter: 2100, train loss: 1.7485, train acc: 0.4219, valid loss: 1.9874, valid acc: 0.3438
Iter: 2200, train loss: 1.7120, train acc: 0.4219, valid loss: 1.9907, valid acc: 0.3485
Iter: 2300, train loss: 1.7211, train acc: 0.4844, valid loss: 1.9931, valid acc: 0.3638
Iter: 2400, train loss: 1.8597, train acc: 0.3750, valid loss: 1.9874, valid acc: 0.3492
Iter: 2500, train loss: 1.6148, train acc: 0.4375, valid loss: 1.9869, valid acc: 0.3546
Iter: 2600, train loss: 1.9247, train acc: 0.4062, valid loss: 1.9793, valid acc: 0.3485
Iter: 2700, train loss: 2.1673, train acc: 0.2812, valid loss: 1.9829, valid acc: 0.3531
Iter: 2800, train loss: 1.6941, train acc: 0.4688, valid loss: 1.9825, valid acc: 0.3446
Iter: 2900, train loss: 2.0092, train acc: 0.3594, valid loss: 1.9934, valid acc: 0.3500
Iter: 3000, train loss: 1.9561, train acc: 0.3594, valid loss: 1.9914, valid acc: 0.3492
Iter: 3100, train loss: 1.8354, train acc: 0.3438, valid loss: 1.9808, valid acc: 0.3508
Iter: 3200, train loss: 1.5549, train acc: 0.4844, valid loss: 1.9824, valid acc: 0.3523
Iter: 3300, train loss: 1.5683, train acc: 0.5000, valid loss: 1.9815, valid acc: 0.3669
Iter: 3400, train loss: 1.7742, train acc: 0.4219, valid loss: 1.9765, valid acc: 0.3554
Iter: 3500, train loss: 2.1070, train acc: 0.3594, valid loss: 1.9874, valid acc: 0.3523
Iter: 3600, train loss: 1.8632, train acc: 0.3438, valid loss: 1.9800, valid acc: 0.3469
Iter: 3700, train loss: 1.7199, train acc: 0.5312, valid loss: 1.9746, valid acc: 0.3538
Iter: 3800, train loss: 1.9062, train acc: 0.4062, valid loss: 1.9660, valid acc: 0.3562
Iter: 3900, train loss: 2.0902, train acc: 0.2344, valid loss: 1.9737, valid acc: 0.3515
Iter: 4000, train loss: 1.8538, train acc: 0.4688, valid loss: 1.9742, valid acc: 0.3554
Iter: 4100, train loss: 2.0096, train acc: 0.3750, valid loss: 1.9799, valid acc: 0.3538
Iter: 4200, train loss: 1.9119, train acc: 0.4062, valid loss: 1.9731, valid acc: 0.3685
Iter: 4300, train loss: 1.7919, train acc: 0.4219, valid loss: 1.9649, valid acc: 0.3608
Iter: 4400, train loss: 1.7566, train acc: 0.4219, valid loss: 1.9690, valid acc: 0.3592
Iter: 4500, train loss: 1.8899, train acc: 0.3750, valid loss: 1.9734, valid acc: 0.3569
Iter: 4600, train loss: 1.8028, train acc: 0.4531, valid loss: 1.9863, valid acc: 0.3523
Iter: 4700, train loss: 1.9527, train acc: 0.3281, valid loss: 1.9781, valid acc: 0.3477
Iter: 4800, train loss: 1.8451, train acc: 0.3906, valid loss: 1.9819, valid acc: 0.3569
Iter: 4900, train loss: 1.6553, train acc: 0.4375, valid loss: 1.9925, valid acc: 0.3423
Iter: 5000, train loss: 1.9356, train acc: 0.3125, valid loss: 1.9750, valid acc: 0.3592
Iter: 5100, train loss: 1.5883, train acc: 0.4375, valid loss: 1.9780, valid acc: 0.3508
Iter: 5200, train loss: 1.7207, train acc: 0.4219, valid loss: 1.9682, valid acc: 0.3662
Iter: 5300, train loss: 1.5718, train acc: 0.5312, valid loss: 1.9742, valid acc: 0.3623
Iter: 5400, train loss: 1.7518, train acc: 0.4219, valid loss: 1.9807, valid acc: 0.3400
Iter: 5500, train loss: 1.9872, train acc: 0.3281, valid loss: 1.9834, valid acc: 0.3638
Iter: 5600, train loss: 2.0309, train acc: 0.3594, valid loss: 1.9717, valid acc: 0.3623
Iter: 5700, train loss: 1.9841, train acc: 0.3906, valid loss: 1.9714, valid acc: 0.3600
Iter: 5800, train loss: 1.6851, train acc: 0.4375, valid loss: 1.9723, valid acc: 0.3569
Iter: 5900, train loss: 2.1889, train acc: 0.2500, valid loss: 1.9704, valid acc: 0.3631
Iter: 6000, train loss: 1.7258, train acc: 0.4531, valid loss: 1.9848, valid acc: 0.3446
Iter: 6100, train loss: 1.6587, train acc: 0.4844, valid loss: 1.9714, valid acc: 0.3562
Iter: 6200, train loss: 1.7574, train acc: 0.4531, valid loss: 1.9778, valid acc: 0.3554
Iter: 6300, train loss: 1.8034, train acc: 0.4219, valid loss: 1.9726, valid acc: 0.3631
Iter: 6400, train loss: 1.8055, train acc: 0.3906, valid loss: 1.9836, valid acc: 0.3515
Iter: 6500, train loss: 1.6675, train acc: 0.4844, valid loss: 1.9811, valid acc: 0.3585
Iter: 6600, train loss: 1.8209, train acc: 0.3750, valid loss: 1.9779, valid acc: 0.3608
Iter: 6700, train loss: 1.9507, train acc: 0.3125, valid loss: 1.9808, valid acc: 0.3531
Iter: 6800, train loss: 1.7994, train acc: 0.4062, valid loss: 1.9788, valid acc: 0.3600
Iter: 6900, train loss: 1.7434, train acc: 0.5000, valid loss: 1.9787, valid acc: 0.3662
Iter: 7000, train loss: 1.7142, train acc: 0.4375, valid loss: 1.9818, valid acc: 0.3615
Iter: 7100, train loss: 1.6337, train acc: 0.4688, valid loss: 1.9897, valid acc: 0.3554
Iter: 7200, train loss: 1.7296, train acc: 0.4688, valid loss: 1.9809, valid acc: 0.3646
Iter: 7300, train loss: 1.6340, train acc: 0.4531, valid loss: 1.9768, valid acc: 0.3692
Iter: 7400, train loss: 1.9278, train acc: 0.3125, valid loss: 1.9830, valid acc: 0.3562
Iter: 7500, train loss: 1.8554, train acc: 0.2969, valid loss: 1.9756, valid acc: 0.3762
Iter: 7600, train loss: 1.6829, train acc: 0.3906, valid loss: 1.9732, valid acc: 0.3723
Iter: 7700, train loss: 2.0392, train acc: 0.3125, valid loss: 1.9774, valid acc: 0.3685
Iter: 7800, train loss: 1.6671, train acc: 0.4219, valid loss: 1.9828, valid acc: 0.3631
Iter: 7900, train loss: 1.6543, train acc: 0.3438, valid loss: 1.9695, valid acc: 0.3646
Iter: 8000, train loss: 1.9609, train acc: 0.3750, valid loss: 1.9670, valid acc: 0.3715
Iter: 8100, train loss: 1.4597, train acc: 0.5312, valid loss: 1.9773, valid acc: 0.3562
Iter: 8200, train loss: 1.5098, train acc: 0.4844, valid loss: 1.9702, valid acc: 0.3746
Iter: 8300, train loss: 1.7411, train acc: 0.4219, valid loss: 1.9707, valid acc: 0.3669
Iter: 8400, train loss: 1.7277, train acc: 0.4062, valid loss: 1.9730, valid acc: 0.3646
Iter: 8500, train loss: 1.6993, train acc: 0.3906, valid loss: 1.9950, valid acc: 0.3569
Iter: 8600, train loss: 1.5676, train acc: 0.4688, valid loss: 1.9772, valid acc: 0.3708
Iter: 8700, train loss: 2.0279, train acc: 0.4219, valid loss: 1.9867, valid acc: 0.3654
Iter: 8800, train loss: 1.8400, train acc: 0.4062, valid loss: 1.9894, valid acc: 0.3685
Iter: 8900, train loss: 1.6425, train acc: 0.4688, valid loss: 1.9814, valid acc: 0.3623
Iter: 9000, train loss: 1.6891, train acc: 0.3438, valid loss: 1.9758, valid acc: 0.3723
Iter: 9100, train loss: 1.6954, train acc: 0.4219, valid loss: 1.9882, valid acc: 0.3662
Iter: 9200, train loss: 1.6832, train acc: 0.4219, valid loss: 1.9766, valid acc: 0.3715
Iter: 9300, train loss: 1.6038, train acc: 0.5000, valid loss: 1.9912, valid acc: 0.3585
Iter: 9400, train loss: 1.7378, train acc: 0.3750, valid loss: 1.9940, valid acc: 0.3623
Iter: 9500, train loss: 1.8862, train acc: 0.3906, valid loss: 1.9849, valid acc: 0.3708
Iter: 9600, train loss: 1.6841, train acc: 0.4219, valid loss: 1.9936, valid acc: 0.3669
Iter: 9700, train loss: 1.7176, train acc: 0.3750, valid loss: 1.9882, valid acc: 0.3638
Iter: 9800, train loss: 1.7337, train acc: 0.3906, valid loss: 1.9838, valid acc: 0.3615
Iter: 9900, train loss: 1.5996, train acc: 0.4844, valid loss: 1.9896, valid acc: 0.3669
Iter: 10000, train loss: 1.8500, train acc: 0.3438, valid loss: 1.9904, valid acc: 0.3646

In [79]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.legend()
plt.show()



In [80]:
loss_train = np.array(nn.losses['train'], dtype=float)
loss_valid = np.array(nn.losses['valid'], dtype=float)
loss_train.shape, loss_valid.shape


Out[80]:
((10000,), (10000,))

In [81]:
loss_train_norm = (loss_train - loss_train.mean(axis=0))/ loss_train.std(axis=0)
loss_valid_norm = (loss_valid - loss_valid.mean(axis=0))/ loss_valid.std(axis=0)

In [82]:
plt.plot(loss_train_norm, label='Normalized train loss')
plt.plot(loss_valid_norm, label='Normalized valid loss')
plt.legend()
plt.show()



In [83]:
plt.plot(nn.losses['train_acc'], label='Train accuracy')
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.legend()
plt.show()



In [84]:
heading = labels_keys_sorted.copy()
heading.insert(0, 'Id')
heading


Out[84]:
['Id',
 'Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [85]:
y_pred, y_logits = nn.test(X_test)
y_prob = l.softmax(y_logits)
y_prob.shape, X_test.shape, y_logits.shape, test_y_sample.shape, test_y_sample[:1]


Out[85]:
((10400, 13),
 (10400, 26),
 (10400, 13),
 (10400, 14),
    Id   Blues  Country  Electronic    Folk  International    Jazz   Latin  \
 0   1  0.0964   0.0884      0.0121  0.1004         0.0137  0.1214  0.0883   
 
    New_Age  Pop_Rock     Rap  Reggae     RnB   Vocal  
 0   0.0765    0.0332  0.0445  0.1193  0.1019  0.1038  )

In [86]:
pred_list = []
for Id, pred in enumerate(y_prob):
#     print(Id+1, *pred)
    pred_list.append([Id+1, *pred])

In [94]:
pred_file = open(file='prediction.csv', mode='w')
pred_file.write('\n') # because of the previous line        

for idx in range(len(heading)):
    if idx < len(heading) - 1:
        pred_file.write(heading[idx] + ',')
    else:
        pred_file.write(heading[idx] + '\n')        

# len(test), test[0]
# for key in test:
for i in range(len(pred_list)): # rows
    for j in range(len(pred_list[i])): # cols
        if j < (len(pred_list[i]) - 1):
            pred_file.write(str(pred_list[i][j]))
            pred_file.write(',')
        else: # last item before starting a new line
            pred_file.write(str(pred_list[i][j]) + '\n')        

# pred_file.write(-',')
pred_file.close()

In [95]:
pd.read_csv(filepath_or_buffer='prediction.csv').head()


Out[95]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.005662 0.000523 0.024175 0.003227 0.010947 0.006867 0.009251 0.001542 0.011825 0.518482 0.354914 0.034134 0.018449
1 2 0.044319 0.009130 0.003746 0.040108 0.060068 0.015706 0.190782 0.006279 0.007161 0.058666 0.506629 0.010796 0.046609
2 3 0.008886 0.001805 0.020746 0.003116 0.019151 0.000415 0.023228 0.002645 0.046945 0.170752 0.610813 0.090188 0.001309
3 4 0.037067 0.050894 0.042052 0.046739 0.128590 0.008095 0.016393 0.007842 0.022861 0.069318 0.388322 0.115355 0.066472
4 5 0.002078 0.000041 0.027263 0.000310 0.008667 0.000373 0.007880 0.003097 0.004524 0.584146 0.358993 0.002589 0.000039

In [96]:
pd.read_csv(filepath_or_buffer='prediction.csv').shape, test_y_sample.shape


Out[96]:
((10400, 14), (10400, 14))

In [97]:
test_y_sample.head()


Out[97]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [ ]: