In [1]:
import pandas as pd # to read CSV files (Comma Separated Values)

train_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.x.csv')
train_x.head()


Out[1]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att18 att19 att20 att21 att22 att23 att24 att25 att26 msd_track_id
0 1 41.08 6.579 4.307 3.421 3.192 2.076 2.179 2.052 1.794 ... 1.3470 -0.2463 -1.5470 0.17920 -1.1530 -0.7370 0.40750 -0.67190 -0.05147 TRPLTEM128F92E1389
1 2 60.80 5.973 4.344 3.261 2.835 2.725 2.446 1.884 1.962 ... -0.3316 0.3519 -1.4760 0.52700 -2.1960 1.5990 -1.39000 0.22560 -0.72080 TRJWMBQ128F424155E
2 3 51.47 4.971 4.316 2.916 3.112 2.290 2.053 1.934 1.878 ... -0.2803 -0.1603 -0.1355 1.03500 0.2370 1.4890 0.02959 -0.13670 0.10820 TRRZWMO12903CCFCC2
3 4 41.28 6.610 4.411 2.602 2.822 2.126 1.984 1.973 1.945 ... -1.6930 1.0040 -0.3953 0.26710 -1.0450 0.4974 0.03724 1.04500 -0.20000 TRBZRUT12903CE6C04
4 5 54.17 8.945 4.685 4.208 3.154 3.527 2.733 2.202 2.686 ... 2.4690 -0.5449 -0.5622 -0.08968 -0.9823 -0.2445 -1.65800 -0.04825 -0.70950 TRLUJQF128F42AF5BF

5 rows × 28 columns


In [2]:
train_y = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.y.csv')
train_y.head()


Out[2]:
Id class_label
0 1 International
1 2 Vocal
2 3 Latin
3 4 Blues
4 5 Vocal

In [3]:
test_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/test.x.csv')
test_x.head()


Out[3]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att17 att18 att19 att20 att21 att22 att23 att24 att25 att26
0 1 38.22 8.076 6.935 4.696 3.856 3.465 2.922 2.568 2.070 ... 3.988 0.4957 0.1836 -2.2210 0.6453 -0.2923 1.2000 -0.09179 0.4674 0.2158
1 2 36.42 6.131 5.364 4.292 3.968 2.937 2.872 2.142 2.050 ... 7.098 1.2290 0.5971 -1.0670 0.9569 -1.8240 2.3130 -0.80890 0.5612 -0.6225
2 3 70.01 5.496 4.698 3.699 3.258 2.293 2.680 2.226 2.034 ... 4.449 0.4773 1.6370 -1.0690 2.4160 -0.6299 1.4190 -0.81960 0.9151 -0.5948
3 4 40.64 7.281 6.702 4.043 3.729 3.043 2.644 2.366 1.940 ... 2.785 1.9000 -1.1370 1.2750 1.7920 -2.1250 1.6090 -0.83230 -0.1998 -0.1218
4 5 38.85 7.118 5.703 4.825 4.088 3.823 3.254 2.551 2.193 ... 4.536 2.1470 1.0200 -0.2656 2.8050 0.2762 0.2504 1.04900 0.3447 -0.7689

5 rows × 27 columns


In [4]:
test_y_sample = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/submission-random.csv')
test_y_sample.head()


Out[4]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [5]:
test_y_sample[:0]


Out[5]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal

In [6]:
import numpy as np

train_X = np.array(train_x)
train_Y = np.array(train_y[:]['class_label'])
test_X = np.array(test_x)

# Getting rid of the first and the last column: Id and msd_track_id
X_train_val = np.array(train_X[:, 1:-1], dtype=float)
X_test = np.array(test_X[:, 1:], dtype=float)

train_Y.shape


Out[6]:
(13000,)

In [7]:
from collections import Counter

# Count the freq of the keys in the training labels
counted_labels = Counter(train_Y)
labels_keys = counted_labels.keys()
labels_keys


Out[7]:
dict_keys(['Country', 'Blues', 'Folk', 'Reggae', 'Jazz', 'International', 'RnB', 'New_Age', 'Electronic', 'Rap', 'Pop_Rock', 'Vocal', 'Latin'])

In [8]:
labels_keys_sorted = sorted(labels_keys)
labels_keys_sorted


Out[8]:
['Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [9]:
# This for loop for creating a dictionary/ vocab
key_to_val = {key: val for val, key in enumerate(labels_keys_sorted)}
key_to_val['Country']
key_to_val


Out[9]:
{'Blues': 0,
 'Country': 1,
 'Electronic': 2,
 'Folk': 3,
 'International': 4,
 'Jazz': 5,
 'Latin': 6,
 'New_Age': 7,
 'Pop_Rock': 8,
 'Rap': 9,
 'Reggae': 10,
 'RnB': 11,
 'Vocal': 12}

In [10]:
val_to_key = {val: key for val, key in enumerate(labels_keys_sorted)}
val_to_key[1]
val_to_key


Out[10]:
{0: 'Blues',
 1: 'Country',
 2: 'Electronic',
 3: 'Folk',
 4: 'International',
 5: 'Jazz',
 6: 'Latin',
 7: 'New_Age',
 8: 'Pop_Rock',
 9: 'Rap',
 10: 'Reggae',
 11: 'RnB',
 12: 'Vocal'}

In [11]:
Y_train_vec = []
for each in train_y[:]['class_label']:
#     print(each, key_to_val[each])
    Y_train_vec.append(key_to_val[each])

Y_train_val = np.array(Y_train_vec)
Y_train_val.shape


Out[11]:
(13000,)

In [12]:
# # Pre-processing: normalizing
# def normalize(X):
#     # max scale for images 255= 2**8= 8 bit grayscale for each channel
#     return (X - X.mean(axis=0)) #/ X.std(axis=0)
# X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)

# Preprocessing: normalizing the data based on the training set
mean = X_train_val.mean(axis=0)
std = X_train_val.std(axis=0)

X_train_val, X_test = (X_train_val - mean)/ std, (X_test - mean)/ std
X_train_val.shape, X_test.shape, X_train_val.dtype, X_test.dtype


Out[12]:
((13000, 26), (10400, 26), dtype('float64'), dtype('float64'))

In [13]:
# Creating validation set: 10% or 1/10 of the training set or whatever dataset with labels/ annotation
valid_size = X_train_val.shape[0]//10
valid_size
X_val = X_train_val[-valid_size:]
Y_val = Y_train_val[-valid_size:]
X_train = X_train_val[: -valid_size]
Y_train = Y_train_val[: -valid_size]
X_train_val.shape, 
X_train.shape, X_val.shape, X_test.shape, Y_val.shape, Y_train.shape 
# X_train.dtype, X_val.dtype
# Y_train.dtype, Y_val


Out[13]:
((11700, 26), (1300, 26), (10400, 26), (1300,), (11700,))

In [21]:
# The model here is refering to the last layer model before softmax function applied.
def softmax(X):
    eX = np.exp((X.T - np.max(X, axis=1)).T)
    return (eX.T / eX.sum(axis=1)).T

def cross_entropy(y_pred, y_train):
    m = y_pred.shape[0]

    prob = softmax(y_pred)
#     print(prob.shape, y_train.shape, m)
    log_like = -np.log(prob[range(m), y_train]) # to avoid the division/dividing by zero
    data_loss = np.sum(log_like) / m

    return data_loss

def dcross_entropy(y_pred, y_train): # this is equal for both since the reg_loss (noise) derivative is ZERO.
    m = y_pred.shape[0]

    grad_y = softmax(y_pred)
    grad_y[range(m), y_train] -= 1.
    grad_y /= m

    return grad_y

# def loss_function(y, y_train):

#     loss = cross_entropy(y, y_train) # softmax is included
#     dy = dcross_entropy(y, y_train) # dsoftmax is included

#     return loss, dy

In [22]:
# Model
import impl.layer as l # or from impl.layer import *
# from impl.loss import * # import all functions from impl.loss file # import impl.loss as loss_func
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L):
        self.L = L # number of layers or depth
        self.losses = {'train':[], 'train_acc':[], 'valid':[], 'valid_acc':[]}
        
        # The adaptive/learnable/updatable random feedforward
        self.model = []
        self.grads = []
        low, high = -1, 1
        
        # Input layer: weights/ biases
        m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), 
                 b=np.zeros((1, H)))
        self.model.append(m)
        # Input layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Hidden layers: weights/ biases
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), 
                     b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L)
        # Hidden layer: gradients
        grad_L = []
        for _ in range(L):
            grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
        self.grads.append(grad_L)
        
        # Output layer: weights/ biases
        m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), 
                 b=np.zeros((1, C)))
        self.model.append(m)
        # Outout layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache):
        W, X = cache

        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        dX = dout @ W.T # Backprop

        return dX, dW, db

    def train_forward(self, X, train):
        caches, ys = [], []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
        y, nl_cache = l.tanh_forward(X=y)
        X = y.copy() # pass to the next layer
        if train:
            caches.append((fc_cache, nl_cache))
        
        # Hidden layers
        fc_caches, nl_caches = [], []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
            y, nl_cache = l.tanh_forward(X=y)
            X = y.copy() # pass to next layer
            if train:
                fc_caches.append(fc_cache)
                nl_caches.append(nl_cache)
        if train:
            caches.append((fc_caches, nl_caches)) # caches[1]            
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        # Softmax is included in loss function
        if train:
            caches.append(fc_cache)

        return y, caches # for backpropating the error

    def loss_function(self, y, y_train):
#         print(y.shape, y_train.shape)
#         print('y[0]', y[0])
#         print('y_train[:3]', y_train[0])
        loss = cross_entropy(y, y_train) # softmax is included
        dy = dcross_entropy(y, y_train) # dsoftmax is included
        
        return loss, dy
        
    def train_backward(self, dy, caches):
        grads = self.grads # initialized by Zero in every iteration/epoch
        
        # Output layer
        fc_cache = caches[2]
        # dSoftmax is included in loss function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        dy = dX.copy()
        grads[2]['W'] = dW
        grads[2]['b'] = db

        # Hidden layer
        fc_caches, nl_caches = caches[1]
        for layer in reversed(range(self.L)):
            dy = l.tanh_backward(cache=nl_caches[layer], dout=dy) # diffable function
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer])
            dy = dX.copy()
            grads[1][layer]['W'] = dW
            grads[1][layer]['b'] = db
        
        # Input layer
        fc_cache, nl_cache = caches[0]
        dy = l.tanh_backward(cache=nl_cache, dout=dy) # diffable function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        grads[0]['W'] = dW
        grads[0]['b'] = db

        return dX, grads
    
    def test(self, X):
        y_logit, _ = self.train_forward(X, train=False)
        
        # if self.mode == 'classification':
        y_prob = l.softmax(y_logit) # for accuracy == acc
        y_pred = np.argmax(y_prob, axis=1) # for loss ==err
        
        return y_pred, y_logit
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set

        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            y, caches = self.train_forward(X_mini, train=True)
            _, dy = self.loss_function(y, y_mini)
            _, grads = self.train_backward(dy, caches) 
            
            # Update the model for input layer
            for key in grads[0].keys():
                self.model[0][key] -= alpha * grads[0][key]

            # Update the model for the hidden layers
            for layer in range(self.L):
                for key in grads[1][layer].keys():
                    self.model[1][layer][key] -= alpha * grads[1][layer][key]

            # Update the model for output layer
            for key in grads[2].keys():
                self.model[2][key] -= alpha * grads[2][key]
                
            # Trained model info
            y_pred, y_logit = self.test(X_mini)
            loss, _ = self.loss_function(y_logit, y_mini) # softmax is included in entropy loss function
            self.losses['train'].append(loss)
            acc = np.mean(y_pred == y_mini) # confusion matrix
            self.losses['train_acc'].append(acc)

            # Validated model info
            y_pred, y_logit = self.test(X_val)
            valid_loss, _ = self.loss_function(y_logit, y_val) # softmax is included in entropy loss function
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val) # confusion matrix
            self.losses['valid_acc'].append(valid_acc)
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter: {}, train loss: {:.4f}, train acc: {:.4f}, valid loss: {:.4f}, valid acc: {:.4f}'.format(
                    iter, loss, acc, valid_loss, valid_acc))

#         # Test the final model
#         y_pred, y_logit = nn.test(X_test)
#         loss, _ = self.loss_function(y_logit, y_test) # softmax is included in entropy loss function
#         acc = np.mean(y_pred == y_test)
#         print('Last iteration - Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.format(
#             acc.mean(), acc.std(), loss))

In [23]:
Y_train.shape, X_train.shape, X_val.shape, Y_val.shape


Out[23]:
((11700,), (11700, 26), (1300, 26), (1300,))

In [25]:
# Hyper-parameters
n_iter = 1000 # number of epochs
alpha = 1e-2 # learning_rate
mb_size = 64 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 10 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = Y_train.max() + 1 # number of classes in this classification problem
# num_output_units = Y_train.shape[1] # number of classes in this classification problem
num_layers = 3 # depth 

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers)

nn.sgd(train_set=(X_train, Y_train), val_set=(X_val, Y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)


Iter: 10, train loss: 2.5907, train acc: 0.0312, valid loss: 2.5715, valid acc: 0.0992
Iter: 20, train loss: 2.5493, train acc: 0.1250, valid loss: 2.5652, valid acc: 0.1038
Iter: 30, train loss: 2.5430, train acc: 0.1094, valid loss: 2.5586, valid acc: 0.1100
Iter: 40, train loss: 2.5706, train acc: 0.0469, valid loss: 2.5518, valid acc: 0.1231
Iter: 50, train loss: 2.5442, train acc: 0.1719, valid loss: 2.5458, valid acc: 0.1254
Iter: 60, train loss: 2.5603, train acc: 0.0781, valid loss: 2.5399, valid acc: 0.1277
Iter: 70, train loss: 2.5106, train acc: 0.1250, valid loss: 2.5337, valid acc: 0.1346
Iter: 80, train loss: 2.5196, train acc: 0.1562, valid loss: 2.5280, valid acc: 0.1331
Iter: 90, train loss: 2.5141, train acc: 0.1094, valid loss: 2.5220, valid acc: 0.1408
Iter: 100, train loss: 2.5078, train acc: 0.2188, valid loss: 2.5161, valid acc: 0.1477
Iter: 110, train loss: 2.4926, train acc: 0.2188, valid loss: 2.5101, valid acc: 0.1477
Iter: 120, train loss: 2.5231, train acc: 0.1406, valid loss: 2.5051, valid acc: 0.1523
Iter: 130, train loss: 2.5002, train acc: 0.1875, valid loss: 2.4998, valid acc: 0.1508
Iter: 140, train loss: 2.5050, train acc: 0.1094, valid loss: 2.4943, valid acc: 0.1554
Iter: 150, train loss: 2.5320, train acc: 0.0781, valid loss: 2.4894, valid acc: 0.1600
Iter: 160, train loss: 2.5189, train acc: 0.1094, valid loss: 2.4844, valid acc: 0.1662
Iter: 170, train loss: 2.5043, train acc: 0.1250, valid loss: 2.4792, valid acc: 0.1685
Iter: 180, train loss: 2.4755, train acc: 0.1562, valid loss: 2.4737, valid acc: 0.1692
Iter: 190, train loss: 2.4512, train acc: 0.1875, valid loss: 2.4685, valid acc: 0.1646
Iter: 200, train loss: 2.4473, train acc: 0.1719, valid loss: 2.4634, valid acc: 0.1677
Iter: 210, train loss: 2.4430, train acc: 0.1406, valid loss: 2.4585, valid acc: 0.1754
Iter: 220, train loss: 2.4230, train acc: 0.1875, valid loss: 2.4534, valid acc: 0.1746
Iter: 230, train loss: 2.5086, train acc: 0.1406, valid loss: 2.4489, valid acc: 0.1700
Iter: 240, train loss: 2.4409, train acc: 0.0625, valid loss: 2.4449, valid acc: 0.1785
Iter: 250, train loss: 2.5113, train acc: 0.0625, valid loss: 2.4400, valid acc: 0.1800
Iter: 260, train loss: 2.4097, train acc: 0.2344, valid loss: 2.4344, valid acc: 0.1808
Iter: 270, train loss: 2.3959, train acc: 0.1250, valid loss: 2.4292, valid acc: 0.1838
Iter: 280, train loss: 2.4486, train acc: 0.1094, valid loss: 2.4236, valid acc: 0.1831
Iter: 290, train loss: 2.3971, train acc: 0.1875, valid loss: 2.4182, valid acc: 0.1823
Iter: 300, train loss: 2.4313, train acc: 0.1562, valid loss: 2.4133, valid acc: 0.1823
Iter: 310, train loss: 2.3792, train acc: 0.1719, valid loss: 2.4082, valid acc: 0.1792
Iter: 320, train loss: 2.3941, train acc: 0.2031, valid loss: 2.4038, valid acc: 0.1800
Iter: 330, train loss: 2.4678, train acc: 0.1406, valid loss: 2.3997, valid acc: 0.1846
Iter: 340, train loss: 2.4761, train acc: 0.1250, valid loss: 2.3953, valid acc: 0.1885
Iter: 350, train loss: 2.3540, train acc: 0.1719, valid loss: 2.3902, valid acc: 0.1885
Iter: 360, train loss: 2.3834, train acc: 0.2344, valid loss: 2.3855, valid acc: 0.1862
Iter: 370, train loss: 2.3411, train acc: 0.2500, valid loss: 2.3809, valid acc: 0.1885
Iter: 380, train loss: 2.4164, train acc: 0.1875, valid loss: 2.3767, valid acc: 0.1915
Iter: 390, train loss: 2.3796, train acc: 0.1875, valid loss: 2.3723, valid acc: 0.1946
Iter: 400, train loss: 2.3666, train acc: 0.1719, valid loss: 2.3681, valid acc: 0.1892
Iter: 410, train loss: 2.3503, train acc: 0.1875, valid loss: 2.3641, valid acc: 0.1985
Iter: 420, train loss: 2.3653, train acc: 0.1719, valid loss: 2.3599, valid acc: 0.1977
Iter: 430, train loss: 2.3443, train acc: 0.2188, valid loss: 2.3557, valid acc: 0.2069
Iter: 440, train loss: 2.3326, train acc: 0.1719, valid loss: 2.3514, valid acc: 0.2077
Iter: 450, train loss: 2.3256, train acc: 0.2656, valid loss: 2.3472, valid acc: 0.2115
Iter: 460, train loss: 2.3333, train acc: 0.2031, valid loss: 2.3432, valid acc: 0.2146
Iter: 470, train loss: 2.2365, train acc: 0.3438, valid loss: 2.3392, valid acc: 0.2138
Iter: 480, train loss: 2.3524, train acc: 0.1719, valid loss: 2.3355, valid acc: 0.2146
Iter: 490, train loss: 2.2976, train acc: 0.2500, valid loss: 2.3318, valid acc: 0.2138
Iter: 500, train loss: 2.3990, train acc: 0.1094, valid loss: 2.3285, valid acc: 0.2162
Iter: 510, train loss: 2.3377, train acc: 0.1875, valid loss: 2.3252, valid acc: 0.2146
Iter: 520, train loss: 2.2551, train acc: 0.2656, valid loss: 2.3217, valid acc: 0.2131
Iter: 530, train loss: 2.3109, train acc: 0.1562, valid loss: 2.3183, valid acc: 0.2162
Iter: 540, train loss: 2.2936, train acc: 0.2188, valid loss: 2.3153, valid acc: 0.2169
Iter: 550, train loss: 2.2519, train acc: 0.2812, valid loss: 2.3125, valid acc: 0.2177
Iter: 560, train loss: 2.3035, train acc: 0.2031, valid loss: 2.3093, valid acc: 0.2208
Iter: 570, train loss: 2.2455, train acc: 0.2344, valid loss: 2.3057, valid acc: 0.2231
Iter: 580, train loss: 2.2971, train acc: 0.2188, valid loss: 2.3025, valid acc: 0.2185
Iter: 590, train loss: 2.2579, train acc: 0.2656, valid loss: 2.2990, valid acc: 0.2177
Iter: 600, train loss: 2.2035, train acc: 0.2656, valid loss: 2.2959, valid acc: 0.2200
Iter: 610, train loss: 2.3431, train acc: 0.2031, valid loss: 2.2926, valid acc: 0.2208
Iter: 620, train loss: 2.4083, train acc: 0.2188, valid loss: 2.2901, valid acc: 0.2215
Iter: 630, train loss: 2.2991, train acc: 0.2031, valid loss: 2.2868, valid acc: 0.2223
Iter: 640, train loss: 2.2152, train acc: 0.3438, valid loss: 2.2845, valid acc: 0.2215
Iter: 650, train loss: 2.2793, train acc: 0.1719, valid loss: 2.2817, valid acc: 0.2223
Iter: 660, train loss: 2.2368, train acc: 0.3281, valid loss: 2.2789, valid acc: 0.2238
Iter: 670, train loss: 2.3546, train acc: 0.1719, valid loss: 2.2764, valid acc: 0.2231
Iter: 680, train loss: 2.2970, train acc: 0.1719, valid loss: 2.2736, valid acc: 0.2223
Iter: 690, train loss: 2.2552, train acc: 0.2656, valid loss: 2.2707, valid acc: 0.2246
Iter: 700, train loss: 2.2516, train acc: 0.1562, valid loss: 2.2676, valid acc: 0.2285
Iter: 710, train loss: 2.3488, train acc: 0.1719, valid loss: 2.2657, valid acc: 0.2338
Iter: 720, train loss: 2.2188, train acc: 0.3594, valid loss: 2.2634, valid acc: 0.2323
Iter: 730, train loss: 2.3272, train acc: 0.1250, valid loss: 2.2609, valid acc: 0.2346
Iter: 740, train loss: 2.2616, train acc: 0.3438, valid loss: 2.2583, valid acc: 0.2369
Iter: 750, train loss: 2.1883, train acc: 0.2969, valid loss: 2.2559, valid acc: 0.2346
Iter: 760, train loss: 2.3402, train acc: 0.1562, valid loss: 2.2536, valid acc: 0.2346
Iter: 770, train loss: 2.2177, train acc: 0.2188, valid loss: 2.2520, valid acc: 0.2354
Iter: 780, train loss: 2.3219, train acc: 0.1406, valid loss: 2.2501, valid acc: 0.2331
Iter: 790, train loss: 2.2685, train acc: 0.2500, valid loss: 2.2480, valid acc: 0.2362
Iter: 800, train loss: 2.0864, train acc: 0.2969, valid loss: 2.2456, valid acc: 0.2377
Iter: 810, train loss: 2.3192, train acc: 0.2656, valid loss: 2.2436, valid acc: 0.2385
Iter: 820, train loss: 2.2444, train acc: 0.2656, valid loss: 2.2415, valid acc: 0.2400
Iter: 830, train loss: 2.1735, train acc: 0.2812, valid loss: 2.2391, valid acc: 0.2408
Iter: 840, train loss: 2.1517, train acc: 0.3281, valid loss: 2.2370, valid acc: 0.2431
Iter: 850, train loss: 2.2640, train acc: 0.2188, valid loss: 2.2358, valid acc: 0.2408
Iter: 860, train loss: 2.2336, train acc: 0.2188, valid loss: 2.2343, valid acc: 0.2415
Iter: 870, train loss: 2.1758, train acc: 0.2656, valid loss: 2.2323, valid acc: 0.2392
Iter: 880, train loss: 2.1359, train acc: 0.3438, valid loss: 2.2303, valid acc: 0.2400
Iter: 890, train loss: 2.2167, train acc: 0.3281, valid loss: 2.2288, valid acc: 0.2408
Iter: 900, train loss: 2.2375, train acc: 0.2031, valid loss: 2.2274, valid acc: 0.2462
Iter: 910, train loss: 2.0695, train acc: 0.3438, valid loss: 2.2257, valid acc: 0.2446
Iter: 920, train loss: 2.2029, train acc: 0.1719, valid loss: 2.2238, valid acc: 0.2454
Iter: 930, train loss: 2.1782, train acc: 0.2500, valid loss: 2.2221, valid acc: 0.2454
Iter: 940, train loss: 2.2485, train acc: 0.2188, valid loss: 2.2201, valid acc: 0.2454
Iter: 950, train loss: 2.2405, train acc: 0.2188, valid loss: 2.2186, valid acc: 0.2446
Iter: 960, train loss: 2.1822, train acc: 0.2031, valid loss: 2.2172, valid acc: 0.2431
Iter: 970, train loss: 2.1362, train acc: 0.2656, valid loss: 2.2156, valid acc: 0.2500
Iter: 980, train loss: 2.0514, train acc: 0.3281, valid loss: 2.2141, valid acc: 0.2469
Iter: 990, train loss: 2.2700, train acc: 0.1562, valid loss: 2.2129, valid acc: 0.2431
Iter: 1000, train loss: 2.1257, train acc: 0.2344, valid loss: 2.2118, valid acc: 0.2415

In [26]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.legend()
plt.show()



In [27]:
loss_train = np.array(nn.losses['train'], dtype=float)
loss_valid = np.array(nn.losses['valid'], dtype=float)
loss_train.shape, loss_valid.shape


Out[27]:
((1000,), (1000,))

In [28]:
loss_train_norm = (loss_train - loss_train.mean(axis=0))/ loss_train.std(axis=0)
loss_valid_norm = (loss_valid - loss_valid.mean(axis=0))/ loss_valid.std(axis=0)

In [29]:
plt.plot(loss_train_norm, label='Normalized train loss')
plt.plot(loss_valid_norm, label='Normalized valid loss')
plt.legend()
plt.show()



In [30]:
plt.plot(nn.losses['train_acc'], label='Train accuracy')
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.legend()
plt.show()



In [31]:
heading = labels_keys_sorted.copy()
heading.insert(0, 'Id')
heading


Out[31]:
['Id',
 'Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [32]:
y_pred, y_logits = nn.test(X_test)
y_prob = l.softmax(y_logits)
y_prob.shape, X_test.shape, y_logits.shape, test_y_sample.shape, test_y_sample[:1]


Out[32]:
((10400, 13),
 (10400, 26),
 (10400, 13),
 (10400, 14),
    Id   Blues  Country  Electronic    Folk  International    Jazz   Latin  \
 0   1  0.0964   0.0884      0.0121  0.1004         0.0137  0.1214  0.0883   
 
    New_Age  Pop_Rock     Rap  Reggae     RnB   Vocal  
 0   0.0765    0.0332  0.0445  0.1193  0.1019  0.1038  )

In [33]:
pred_list = []
for Id, pred in enumerate(y_prob):
#     print(Id+1, *pred)
    pred_list.append([Id+1, *pred])

In [34]:
pred_file = open(file='prediction.csv', mode='w')
pred_file.write('\n') # because of the previous line        

for idx in range(len(heading)):
    if idx < len(heading) - 1:
        pred_file.write(heading[idx] + ',')
    else:
        pred_file.write(heading[idx] + '\n')        

# len(test), test[0]
# for key in test:
for i in range(len(pred_list)): # rows
    for j in range(len(pred_list[i])): # cols
        if j < (len(pred_list[i]) - 1):
            pred_file.write(str(pred_list[i][j]))
            pred_file.write(',')
        else: # last item before starting a new line
            pred_file.write(str(pred_list[i][j]) + '\n')        

# pred_file.write(-',')
pred_file.close()

In [35]:
pd.read_csv(filepath_or_buffer='prediction.csv').head()


Out[35]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.024459 0.029869 0.071955 0.018320 0.037965 0.018433 0.058840 0.013269 0.015400 0.271829 0.288743 0.103627 0.047292
1 2 0.037203 0.041213 0.077074 0.023895 0.040305 0.018671 0.068189 0.012533 0.024319 0.264086 0.242233 0.102108 0.048172
2 3 0.029063 0.035670 0.092826 0.017152 0.027159 0.011204 0.063797 0.005012 0.034270 0.317006 0.254042 0.088914 0.023884
3 4 0.042361 0.051137 0.084519 0.027309 0.034017 0.021157 0.077631 0.010954 0.028864 0.230577 0.230361 0.106878 0.054233
4 5 0.020682 0.024635 0.077866 0.011132 0.022983 0.007787 0.049199 0.004276 0.022132 0.345081 0.318951 0.075108 0.020168

In [36]:
pd.read_csv(filepath_or_buffer='prediction.csv').shape, test_y_sample.shape


Out[36]:
((10400, 14), (10400, 14))

In [140]:
test_y_sample.head()


Out[140]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [ ]:


In [ ]: