In [1]:
import pandas as pd # to read CSV files (Comma Separated Values)

train_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.x.csv')
train_x.head()


Out[1]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att18 att19 att20 att21 att22 att23 att24 att25 att26 msd_track_id
0 1 41.08 6.579 4.307 3.421 3.192 2.076 2.179 2.052 1.794 ... 1.3470 -0.2463 -1.5470 0.17920 -1.1530 -0.7370 0.40750 -0.67190 -0.05147 TRPLTEM128F92E1389
1 2 60.80 5.973 4.344 3.261 2.835 2.725 2.446 1.884 1.962 ... -0.3316 0.3519 -1.4760 0.52700 -2.1960 1.5990 -1.39000 0.22560 -0.72080 TRJWMBQ128F424155E
2 3 51.47 4.971 4.316 2.916 3.112 2.290 2.053 1.934 1.878 ... -0.2803 -0.1603 -0.1355 1.03500 0.2370 1.4890 0.02959 -0.13670 0.10820 TRRZWMO12903CCFCC2
3 4 41.28 6.610 4.411 2.602 2.822 2.126 1.984 1.973 1.945 ... -1.6930 1.0040 -0.3953 0.26710 -1.0450 0.4974 0.03724 1.04500 -0.20000 TRBZRUT12903CE6C04
4 5 54.17 8.945 4.685 4.208 3.154 3.527 2.733 2.202 2.686 ... 2.4690 -0.5449 -0.5622 -0.08968 -0.9823 -0.2445 -1.65800 -0.04825 -0.70950 TRLUJQF128F42AF5BF

5 rows × 28 columns


In [2]:
train_y = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/train.y.csv')
train_y.head()


Out[2]:
Id class_label
0 1 International
1 2 Vocal
2 3 Latin
3 4 Blues
4 5 Vocal

In [3]:
test_x = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/test.x.csv')
test_x.head()


Out[3]:
Id att1 att2 att3 att4 att5 att6 att7 att8 att9 ... att17 att18 att19 att20 att21 att22 att23 att24 att25 att26
0 1 38.22 8.076 6.935 4.696 3.856 3.465 2.922 2.568 2.070 ... 3.988 0.4957 0.1836 -2.2210 0.6453 -0.2923 1.2000 -0.09179 0.4674 0.2158
1 2 36.42 6.131 5.364 4.292 3.968 2.937 2.872 2.142 2.050 ... 7.098 1.2290 0.5971 -1.0670 0.9569 -1.8240 2.3130 -0.80890 0.5612 -0.6225
2 3 70.01 5.496 4.698 3.699 3.258 2.293 2.680 2.226 2.034 ... 4.449 0.4773 1.6370 -1.0690 2.4160 -0.6299 1.4190 -0.81960 0.9151 -0.5948
3 4 40.64 7.281 6.702 4.043 3.729 3.043 2.644 2.366 1.940 ... 2.785 1.9000 -1.1370 1.2750 1.7920 -2.1250 1.6090 -0.83230 -0.1998 -0.1218
4 5 38.85 7.118 5.703 4.825 4.088 3.823 3.254 2.551 2.193 ... 4.536 2.1470 1.0200 -0.2656 2.8050 0.2762 0.2504 1.04900 0.3447 -0.7689

5 rows × 27 columns


In [4]:
test_y_sample = pd.read_csv(filepath_or_buffer='data/kaggle-music-genre/submission-random.csv')
test_y_sample.head()


Out[4]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [5]:
test_y_sample[:0]


Out[5]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal

In [6]:
import numpy as np

train_X = np.array(train_x)
train_Y = np.array(train_y[:]['class_label'])
test_X = np.array(test_x)

# Getting rid of the first and the last column: Id and msd_track_id
X_train_val = np.array(train_X[:, 1:-1], dtype=float)
X_test = np.array(test_X[:, 1:], dtype=float)

train_Y.shape


Out[6]:
(13000,)

In [7]:
from collections import Counter

# Count the freq of the keys in the training labels
counted_labels = Counter(train_Y)
labels_keys = counted_labels.keys()
labels_keys


Out[7]:
dict_keys(['Rap', 'Electronic', 'Vocal', 'Blues', 'International', 'Reggae', 'RnB', 'Country', 'Jazz', 'Pop_Rock', 'Latin', 'New_Age', 'Folk'])

In [8]:
labels_keys_sorted = sorted(labels_keys)
labels_keys_sorted


Out[8]:
['Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [9]:
# This for loop for creating a dictionary/ vocab
key_to_val = {key: val for val, key in enumerate(labels_keys_sorted)}
key_to_val['Country']
key_to_val


Out[9]:
{'Blues': 0,
 'Country': 1,
 'Electronic': 2,
 'Folk': 3,
 'International': 4,
 'Jazz': 5,
 'Latin': 6,
 'New_Age': 7,
 'Pop_Rock': 8,
 'Rap': 9,
 'Reggae': 10,
 'RnB': 11,
 'Vocal': 12}

In [10]:
val_to_key = {val: key for val, key in enumerate(labels_keys_sorted)}
val_to_key[1]
val_to_key


Out[10]:
{0: 'Blues',
 1: 'Country',
 2: 'Electronic',
 3: 'Folk',
 4: 'International',
 5: 'Jazz',
 6: 'Latin',
 7: 'New_Age',
 8: 'Pop_Rock',
 9: 'Rap',
 10: 'Reggae',
 11: 'RnB',
 12: 'Vocal'}

In [11]:
Y_train_vec = []
for each in train_y[:]['class_label']:
#     print(each, key_to_val[each])
    Y_train_vec.append(key_to_val[each])

Y_train_val = np.array(Y_train_vec)
Y_train_val.shape


Out[11]:
(13000,)

In [12]:
# # Pre-processing: normalizing
# def normalize(X):
#     # max scale for images 255= 2**8= 8 bit grayscale for each channel
#     return (X - X.mean(axis=0)) #/ X.std(axis=0)
# X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)

# Preprocessing: normalizing the data based on the training set
mean = X_train_val.mean(axis=0)
std = X_train_val.std(axis=0)

X_train_val, X_test = (X_train_val - mean)/ std, (X_test - mean)/ std
X_train_val.shape, X_test.shape, X_train_val.dtype, X_test.dtype


Out[12]:
((13000, 26), (10400, 26), dtype('float64'), dtype('float64'))

In [13]:
# Creating validation set: 10% or 1/10 of the training set or whatever dataset with labels/ annotation
valid_size = X_train_val.shape[0]//10
valid_size
X_val = X_train_val[-valid_size:]
Y_val = Y_train_val[-valid_size:]
X_train = X_train_val[: -valid_size]
Y_train = Y_train_val[: -valid_size]
X_train_val.shape, 
X_train.shape, X_val.shape, X_test.shape, Y_val.shape, Y_train.shape 
# X_train.dtype, X_val.dtype
# Y_train.dtype, Y_val


Out[13]:
((11700, 26), (1300, 26), (10400, 26), (1300,), (11700,))

In [14]:
# Model
import impl.layer as l # or from impl.layer import *
from impl.loss import * # import all functions from impl.loss file # import impl.loss as loss_func
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L, keep_prob):
        self.L = L # number of layers or depth
        self.losses = {'train':[], 'train_acc':[], 'valid':[], 'valid_acc':[]}
        self.keep_prob = keep_prob # 1 - p_dropout
        
        # The adaptive/learnable/updatable random feedforward
        self.model = []
        self.grads = []
        low, high = -1, 1
        
        # Input layer: weights/ biases
        m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), 
                 b=np.zeros((1, H)))
        self.model.append(m)
        # Input layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Hidden layers: weights/ biases
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), 
                     b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L)
        # Hidden layer: gradients
        grad_L = []
        for _ in range(L):
            grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
        self.grads.append(grad_L)
        
        # Output layer: weights/ biases
        m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), 
                 b=np.zeros((1, C)))
        self.model.append(m)
        # Outout layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache):
        W, X = cache

        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        dX = dout @ W.T # Backprop

        return dX, dW, db

    def train_forward(self, X, train):
        caches, ys = [], []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
#         y, nl_cache = l.tanh_forward(X=y)
#         y, nl_cache = l.relu_forward(X=y)
        y, nl_cache = l.selu_forward(X=y)
        if train:
            y, do_cache = l.selu_dropout_forward(h=y, q=self.keep_prob)
            caches.append((fc_cache, nl_cache, do_cache))
        X = y.copy() # pass to the next layer
        
        # Hidden layers
        fc_caches, nl_caches, do_caches = [], [], []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
#             y, nl_cache = l.tanh_forward(X=y)
#             y, nl_cache = l.relu_forward(X=y)
            y, nl_cache = l.selu_forward(X=y)
            if train:
                y, do_cache = l.selu_dropout_forward(h=y, q=self.keep_prob)
                fc_caches.append(fc_cache)
                nl_caches.append(nl_cache)
                do_caches.append(do_cache)
            X = y.copy() # pass to next layer
        if train:
            caches.append((fc_caches, nl_caches, do_caches)) # caches[1]            
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        # Softmax is included in loss function
        if train:
            caches.append(fc_cache)

        return y, caches # for backpropating the error

    def loss_function(self, y, y_train):
        
        loss = cross_entropy(y, y_train) # softmax is included
        dy = dcross_entropy(y, y_train) # dsoftmax is included
        
        return loss, dy
        
    def train_backward(self, dy, caches):
        grads = self.grads # initialized by Zero in every iteration/epoch
        
        # Output layer
        fc_cache = caches[2]
        # dSoftmax is included in loss function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        dy = dX.copy()
        grads[2]['W'] = dW
        grads[2]['b'] = db

        # Hidden layer
        fc_caches, nl_caches, do_caches = caches[1]
        for layer in reversed(range(self.L)):
            dy = l.selu_dropout_backward(cache=do_caches[layer], dout=dy)
#             dy = l.tanh_backward(cache=nl_caches[layer], dout=dy) # diffable function
#             dy = l.relu_backward(cache=nl_caches[layer], dout=dy) # diffable function
            dy = l.selu_backward(cache=nl_caches[layer], dout=dy) # diffable function
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer])
            dy = dX.copy()
            grads[1][layer]['W'] = dW
            grads[1][layer]['b'] = db
        
        # Input layer
        fc_cache, nl_cache, do_cache = caches[0]
        dy = l.selu_dropout_backward(cache=do_cache, dout=dy)
#         dy = l.tanh_backward(cache=nl_cache, dout=dy) # diffable function
#         dy = l.relu_backward(cache=nl_cache, dout=dy) # diffable function
        dy = l.selu_backward(cache=nl_cache, dout=dy) # diffable function
        _, dW, db = self.fc_backward(dout=dy, cache=fc_cache)
        grads[0]['W'] = dW
        grads[0]['b'] = db

        return grads
    
    def test(self, X):
        y_logit, _ = self.train_forward(X, train=False)
        
        # if self.mode == 'classification':
        y_prob = l.softmax(y_logit) # for accuracy == acc
        y_pred = np.argmax(y_prob, axis=1) # for loss ==err
        
        return y_pred, y_logit
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set

        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            y, caches = self.train_forward(X_mini, train=True)
            _, dy = self.loss_function(y, y_mini)
            grads = self.train_backward(dy, caches) 
            
            # Update the model for input layer
            for key in grads[0].keys():
                self.model[0][key] -= alpha * grads[0][key]

            # Update the model for the hidden layers
            for layer in range(self.L):
                for key in grads[1][layer].keys():
                    self.model[1][layer][key] -= alpha * grads[1][layer][key]

            # Update the model for output layer
            for key in grads[2].keys():
                self.model[2][key] -= alpha * grads[2][key]
                
            # Trained model info
            y_pred, y_logit = self.test(X_mini)
            loss, _ = self.loss_function(y_logit, y_mini) # softmax is included in entropy loss function
            self.losses['train'].append(loss)
            acc = np.mean(y_pred == y_mini) # confusion matrix
            self.losses['train_acc'].append(acc)

            # Validated model info
            y_pred, y_logit = self.test(X_val)
            valid_loss, _ = self.loss_function(y_logit, y_val) # softmax is included in entropy loss function
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val) # confusion matrix
            self.losses['valid_acc'].append(valid_acc)
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter: {}, train loss: {:.4f}, train acc: {:.4f}, valid loss: {:.4f}, valid acc: {:.4f}'.format(
                    iter, loss, acc, valid_loss, valid_acc))

#         # Test the final model
#         y_pred, y_logit = nn.test(X_test)
#         loss, _ = self.loss_function(y_logit, y_test) # softmax is included in entropy loss function
#         acc = np.mean(y_pred == y_test)
#         print('Last iteration - Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.format(
#             acc.mean(), acc.std(), loss))

In [15]:
Y_train.shape, X_train.shape, X_val.shape, Y_val.shape


Out[15]:
((11700,), (11700, 26), (1300, 26), (1300,))

In [16]:
# Hyper-parameters
n_iter = 100000 # number of epochs
alpha = 1e-3 # learning_rate
mb_size = 64 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 100 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = Y_train.max() + 1 # number of classes in this classification problem
# num_output_units = Y_train.shape[1] # number of classes in this classification problem
num_layers = 2 # depth
keep_prob = 0.95 # SELU dropout

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers, keep_prob=keep_prob)

nn.sgd(train_set=(X_train, Y_train), val_set=(X_val, Y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)


Iter: 100, train loss: 2.9465, train acc: 0.0781, valid loss: 2.7803, valid acc: 0.0731
Iter: 200, train loss: 2.6232, train acc: 0.1250, valid loss: 2.7368, valid acc: 0.0785
Iter: 300, train loss: 2.6348, train acc: 0.1094, valid loss: 2.7010, valid acc: 0.0831
Iter: 400, train loss: 2.5715, train acc: 0.1094, valid loss: 2.6715, valid acc: 0.0869
Iter: 500, train loss: 2.6407, train acc: 0.1250, valid loss: 2.6448, valid acc: 0.0938
Iter: 600, train loss: 2.6377, train acc: 0.0469, valid loss: 2.6218, valid acc: 0.1031
Iter: 700, train loss: 2.5308, train acc: 0.1406, valid loss: 2.6003, valid acc: 0.1115
Iter: 800, train loss: 2.5808, train acc: 0.1250, valid loss: 2.5809, valid acc: 0.1185
Iter: 900, train loss: 2.4426, train acc: 0.0781, valid loss: 2.5630, valid acc: 0.1246
Iter: 1000, train loss: 2.5300, train acc: 0.1094, valid loss: 2.5470, valid acc: 0.1346
Iter: 1100, train loss: 2.6057, train acc: 0.1406, valid loss: 2.5313, valid acc: 0.1385
Iter: 1200, train loss: 2.4836, train acc: 0.0938, valid loss: 2.5176, valid acc: 0.1454
Iter: 1300, train loss: 2.5015, train acc: 0.1094, valid loss: 2.5041, valid acc: 0.1554
Iter: 1400, train loss: 2.3553, train acc: 0.2656, valid loss: 2.4919, valid acc: 0.1592
Iter: 1500, train loss: 2.4618, train acc: 0.1094, valid loss: 2.4800, valid acc: 0.1677
Iter: 1600, train loss: 2.4168, train acc: 0.1406, valid loss: 2.4685, valid acc: 0.1723
Iter: 1700, train loss: 2.3751, train acc: 0.1719, valid loss: 2.4578, valid acc: 0.1800
Iter: 1800, train loss: 2.4650, train acc: 0.1250, valid loss: 2.4473, valid acc: 0.1831
Iter: 1900, train loss: 2.4609, train acc: 0.1562, valid loss: 2.4372, valid acc: 0.1831
Iter: 2000, train loss: 2.4866, train acc: 0.0938, valid loss: 2.4275, valid acc: 0.1862
Iter: 2100, train loss: 2.2460, train acc: 0.2500, valid loss: 2.4180, valid acc: 0.1923
Iter: 2200, train loss: 2.4460, train acc: 0.1719, valid loss: 2.4090, valid acc: 0.1938
Iter: 2300, train loss: 2.2398, train acc: 0.2344, valid loss: 2.4003, valid acc: 0.1985
Iter: 2400, train loss: 2.4666, train acc: 0.1406, valid loss: 2.3919, valid acc: 0.2015
Iter: 2500, train loss: 2.3554, train acc: 0.2656, valid loss: 2.3840, valid acc: 0.2046
Iter: 2600, train loss: 2.2294, train acc: 0.2031, valid loss: 2.3763, valid acc: 0.2069
Iter: 2700, train loss: 2.4061, train acc: 0.1875, valid loss: 2.3689, valid acc: 0.2115
Iter: 2800, train loss: 2.3525, train acc: 0.1719, valid loss: 2.3615, valid acc: 0.2131
Iter: 2900, train loss: 2.3511, train acc: 0.1719, valid loss: 2.3546, valid acc: 0.2131
Iter: 3000, train loss: 2.3228, train acc: 0.2188, valid loss: 2.3478, valid acc: 0.2108
Iter: 3100, train loss: 2.3546, train acc: 0.1875, valid loss: 2.3415, valid acc: 0.2085
Iter: 3200, train loss: 2.3257, train acc: 0.2500, valid loss: 2.3353, valid acc: 0.2138
Iter: 3300, train loss: 2.3255, train acc: 0.1250, valid loss: 2.3292, valid acc: 0.2154
Iter: 3400, train loss: 2.3150, train acc: 0.2812, valid loss: 2.3236, valid acc: 0.2131
Iter: 3500, train loss: 2.2916, train acc: 0.2656, valid loss: 2.3185, valid acc: 0.2146
Iter: 3600, train loss: 2.3468, train acc: 0.1562, valid loss: 2.3130, valid acc: 0.2146
Iter: 3700, train loss: 2.3134, train acc: 0.1250, valid loss: 2.3082, valid acc: 0.2138
Iter: 3800, train loss: 2.1415, train acc: 0.2656, valid loss: 2.3036, valid acc: 0.2162
Iter: 3900, train loss: 2.2950, train acc: 0.2500, valid loss: 2.2991, valid acc: 0.2185
Iter: 4000, train loss: 2.2473, train acc: 0.2031, valid loss: 2.2950, valid acc: 0.2246
Iter: 4100, train loss: 2.2382, train acc: 0.2969, valid loss: 2.2906, valid acc: 0.2254
Iter: 4200, train loss: 2.3832, train acc: 0.1719, valid loss: 2.2866, valid acc: 0.2238
Iter: 4300, train loss: 2.3745, train acc: 0.1719, valid loss: 2.2828, valid acc: 0.2246
Iter: 4400, train loss: 2.2166, train acc: 0.2656, valid loss: 2.2791, valid acc: 0.2246
Iter: 4500, train loss: 2.2700, train acc: 0.2344, valid loss: 2.2754, valid acc: 0.2246
Iter: 4600, train loss: 2.2604, train acc: 0.1719, valid loss: 2.2720, valid acc: 0.2262
Iter: 4700, train loss: 2.3448, train acc: 0.1562, valid loss: 2.2687, valid acc: 0.2285
Iter: 4800, train loss: 2.2301, train acc: 0.2344, valid loss: 2.2660, valid acc: 0.2315
Iter: 4900, train loss: 2.1775, train acc: 0.3438, valid loss: 2.2625, valid acc: 0.2323
Iter: 5000, train loss: 2.1571, train acc: 0.2656, valid loss: 2.2594, valid acc: 0.2308
Iter: 5100, train loss: 2.3008, train acc: 0.1875, valid loss: 2.2564, valid acc: 0.2323
Iter: 5200, train loss: 2.1284, train acc: 0.2969, valid loss: 2.2536, valid acc: 0.2338
Iter: 5300, train loss: 2.1153, train acc: 0.3125, valid loss: 2.2511, valid acc: 0.2346
Iter: 5400, train loss: 2.1963, train acc: 0.2031, valid loss: 2.2485, valid acc: 0.2346
Iter: 5500, train loss: 2.2427, train acc: 0.2500, valid loss: 2.2461, valid acc: 0.2362
Iter: 5600, train loss: 2.2995, train acc: 0.2344, valid loss: 2.2439, valid acc: 0.2377
Iter: 5700, train loss: 2.3494, train acc: 0.2188, valid loss: 2.2417, valid acc: 0.2377
Iter: 5800, train loss: 2.3243, train acc: 0.1875, valid loss: 2.2396, valid acc: 0.2377
Iter: 5900, train loss: 2.2660, train acc: 0.2500, valid loss: 2.2377, valid acc: 0.2392
Iter: 6000, train loss: 2.1531, train acc: 0.3125, valid loss: 2.2352, valid acc: 0.2400
Iter: 6100, train loss: 2.1566, train acc: 0.1875, valid loss: 2.2330, valid acc: 0.2392
Iter: 6200, train loss: 2.2044, train acc: 0.2344, valid loss: 2.2308, valid acc: 0.2431
Iter: 6300, train loss: 2.2248, train acc: 0.2344, valid loss: 2.2289, valid acc: 0.2431
Iter: 6400, train loss: 2.2122, train acc: 0.2188, valid loss: 2.2271, valid acc: 0.2423
Iter: 6500, train loss: 2.0855, train acc: 0.2656, valid loss: 2.2255, valid acc: 0.2415
Iter: 6600, train loss: 2.1644, train acc: 0.2500, valid loss: 2.2236, valid acc: 0.2438
Iter: 6700, train loss: 2.1315, train acc: 0.2812, valid loss: 2.2223, valid acc: 0.2431
Iter: 6800, train loss: 2.0545, train acc: 0.3125, valid loss: 2.2202, valid acc: 0.2446
Iter: 6900, train loss: 2.1258, train acc: 0.3281, valid loss: 2.2187, valid acc: 0.2462
Iter: 7000, train loss: 2.2310, train acc: 0.2344, valid loss: 2.2171, valid acc: 0.2454
Iter: 7100, train loss: 2.2959, train acc: 0.2656, valid loss: 2.2155, valid acc: 0.2446
Iter: 7200, train loss: 2.2689, train acc: 0.2656, valid loss: 2.2143, valid acc: 0.2485
Iter: 7300, train loss: 2.1945, train acc: 0.2031, valid loss: 2.2126, valid acc: 0.2485
Iter: 7400, train loss: 2.1084, train acc: 0.2969, valid loss: 2.2111, valid acc: 0.2492
Iter: 7500, train loss: 2.1082, train acc: 0.2969, valid loss: 2.2093, valid acc: 0.2492
Iter: 7600, train loss: 2.1131, train acc: 0.2812, valid loss: 2.2082, valid acc: 0.2492
Iter: 7700, train loss: 2.1070, train acc: 0.2656, valid loss: 2.2062, valid acc: 0.2523
Iter: 7800, train loss: 2.3934, train acc: 0.2031, valid loss: 2.2050, valid acc: 0.2554
Iter: 7900, train loss: 2.1290, train acc: 0.3281, valid loss: 2.2038, valid acc: 0.2538
Iter: 8000, train loss: 2.1808, train acc: 0.2188, valid loss: 2.2027, valid acc: 0.2554
Iter: 8100, train loss: 2.1915, train acc: 0.2344, valid loss: 2.2013, valid acc: 0.2523
Iter: 8200, train loss: 2.3379, train acc: 0.1875, valid loss: 2.1997, valid acc: 0.2523
Iter: 8300, train loss: 2.0765, train acc: 0.3125, valid loss: 2.1982, valid acc: 0.2546
Iter: 8400, train loss: 2.1533, train acc: 0.2969, valid loss: 2.1974, valid acc: 0.2538
Iter: 8500, train loss: 2.2552, train acc: 0.2969, valid loss: 2.1962, valid acc: 0.2523
Iter: 8600, train loss: 1.9552, train acc: 0.3281, valid loss: 2.1949, valid acc: 0.2531
Iter: 8700, train loss: 2.0836, train acc: 0.3125, valid loss: 2.1941, valid acc: 0.2538
Iter: 8800, train loss: 2.1099, train acc: 0.3281, valid loss: 2.1928, valid acc: 0.2523
Iter: 8900, train loss: 2.3575, train acc: 0.1719, valid loss: 2.1915, valid acc: 0.2531
Iter: 9000, train loss: 2.1541, train acc: 0.2969, valid loss: 2.1909, valid acc: 0.2523
Iter: 9100, train loss: 2.1196, train acc: 0.2812, valid loss: 2.1891, valid acc: 0.2538
Iter: 9200, train loss: 2.2041, train acc: 0.2344, valid loss: 2.1885, valid acc: 0.2546
Iter: 9300, train loss: 2.2274, train acc: 0.2344, valid loss: 2.1877, valid acc: 0.2531
Iter: 9400, train loss: 2.2746, train acc: 0.2969, valid loss: 2.1867, valid acc: 0.2554
Iter: 9500, train loss: 2.0289, train acc: 0.3125, valid loss: 2.1854, valid acc: 0.2562
Iter: 9600, train loss: 2.0079, train acc: 0.2969, valid loss: 2.1845, valid acc: 0.2538
Iter: 9700, train loss: 2.2022, train acc: 0.2656, valid loss: 2.1832, valid acc: 0.2546
Iter: 9800, train loss: 2.3145, train acc: 0.2188, valid loss: 2.1825, valid acc: 0.2538
Iter: 9900, train loss: 2.1580, train acc: 0.3281, valid loss: 2.1813, valid acc: 0.2546
Iter: 10000, train loss: 2.1096, train acc: 0.2812, valid loss: 2.1800, valid acc: 0.2562
Iter: 10100, train loss: 2.1420, train acc: 0.3594, valid loss: 2.1795, valid acc: 0.2569
Iter: 10200, train loss: 2.1198, train acc: 0.3750, valid loss: 2.1786, valid acc: 0.2569
Iter: 10300, train loss: 2.1455, train acc: 0.2656, valid loss: 2.1779, valid acc: 0.2592
Iter: 10400, train loss: 2.1398, train acc: 0.2656, valid loss: 2.1764, valid acc: 0.2569
Iter: 10500, train loss: 2.1856, train acc: 0.2969, valid loss: 2.1757, valid acc: 0.2585
Iter: 10600, train loss: 2.0070, train acc: 0.3125, valid loss: 2.1746, valid acc: 0.2577
Iter: 10700, train loss: 2.0278, train acc: 0.3281, valid loss: 2.1738, valid acc: 0.2585
Iter: 10800, train loss: 2.0659, train acc: 0.3125, valid loss: 2.1726, valid acc: 0.2615
Iter: 10900, train loss: 2.2451, train acc: 0.2656, valid loss: 2.1716, valid acc: 0.2623
Iter: 11000, train loss: 2.4547, train acc: 0.2500, valid loss: 2.1711, valid acc: 0.2631
Iter: 11100, train loss: 2.2456, train acc: 0.2344, valid loss: 2.1712, valid acc: 0.2631
Iter: 11200, train loss: 2.1767, train acc: 0.3438, valid loss: 2.1706, valid acc: 0.2638
Iter: 11300, train loss: 2.0062, train acc: 0.4219, valid loss: 2.1700, valid acc: 0.2631
Iter: 11400, train loss: 2.1934, train acc: 0.2656, valid loss: 2.1696, valid acc: 0.2646
Iter: 11500, train loss: 2.2049, train acc: 0.2656, valid loss: 2.1688, valid acc: 0.2646
Iter: 11600, train loss: 1.8964, train acc: 0.3750, valid loss: 2.1683, valid acc: 0.2631
Iter: 11700, train loss: 2.2586, train acc: 0.2500, valid loss: 2.1672, valid acc: 0.2646
Iter: 11800, train loss: 1.8651, train acc: 0.4844, valid loss: 2.1662, valid acc: 0.2654
Iter: 11900, train loss: 2.0666, train acc: 0.2969, valid loss: 2.1651, valid acc: 0.2654
Iter: 12000, train loss: 2.0265, train acc: 0.3125, valid loss: 2.1645, valid acc: 0.2662
Iter: 12100, train loss: 2.0919, train acc: 0.3438, valid loss: 2.1630, valid acc: 0.2669
Iter: 12200, train loss: 2.1066, train acc: 0.2812, valid loss: 2.1623, valid acc: 0.2685
Iter: 12300, train loss: 1.8678, train acc: 0.4844, valid loss: 2.1619, valid acc: 0.2685
Iter: 12400, train loss: 2.1437, train acc: 0.2969, valid loss: 2.1611, valid acc: 0.2685
Iter: 12500, train loss: 2.0572, train acc: 0.2500, valid loss: 2.1600, valid acc: 0.2685
Iter: 12600, train loss: 2.2442, train acc: 0.2656, valid loss: 2.1599, valid acc: 0.2700
Iter: 12700, train loss: 2.0732, train acc: 0.2656, valid loss: 2.1591, valid acc: 0.2715
Iter: 12800, train loss: 1.9867, train acc: 0.3906, valid loss: 2.1583, valid acc: 0.2723
Iter: 12900, train loss: 1.9355, train acc: 0.2969, valid loss: 2.1573, valid acc: 0.2738
Iter: 13000, train loss: 2.1197, train acc: 0.3125, valid loss: 2.1562, valid acc: 0.2723
Iter: 13100, train loss: 1.9700, train acc: 0.3594, valid loss: 2.1559, valid acc: 0.2754
Iter: 13200, train loss: 2.1468, train acc: 0.3281, valid loss: 2.1553, valid acc: 0.2731
Iter: 13300, train loss: 2.1388, train acc: 0.2969, valid loss: 2.1549, valid acc: 0.2715
Iter: 13400, train loss: 2.2209, train acc: 0.2500, valid loss: 2.1541, valid acc: 0.2700
Iter: 13500, train loss: 2.2199, train acc: 0.3125, valid loss: 2.1541, valid acc: 0.2723
Iter: 13600, train loss: 2.2478, train acc: 0.2500, valid loss: 2.1532, valid acc: 0.2715
Iter: 13700, train loss: 2.1172, train acc: 0.3906, valid loss: 2.1522, valid acc: 0.2715
Iter: 13800, train loss: 1.9540, train acc: 0.3438, valid loss: 2.1512, valid acc: 0.2762
Iter: 13900, train loss: 1.8852, train acc: 0.4062, valid loss: 2.1504, valid acc: 0.2746
Iter: 14000, train loss: 2.0742, train acc: 0.4375, valid loss: 2.1501, valid acc: 0.2731
Iter: 14100, train loss: 2.2126, train acc: 0.4062, valid loss: 2.1493, valid acc: 0.2777
Iter: 14200, train loss: 1.9796, train acc: 0.3281, valid loss: 2.1486, valid acc: 0.2769
Iter: 14300, train loss: 2.0098, train acc: 0.2969, valid loss: 2.1472, valid acc: 0.2754
Iter: 14400, train loss: 2.1634, train acc: 0.2500, valid loss: 2.1465, valid acc: 0.2723
Iter: 14500, train loss: 2.1609, train acc: 0.1719, valid loss: 2.1452, valid acc: 0.2777
Iter: 14600, train loss: 2.1052, train acc: 0.2969, valid loss: 2.1451, valid acc: 0.2754
Iter: 14700, train loss: 2.0199, train acc: 0.2969, valid loss: 2.1445, valid acc: 0.2731
Iter: 14800, train loss: 2.1060, train acc: 0.2969, valid loss: 2.1437, valid acc: 0.2738
Iter: 14900, train loss: 2.0630, train acc: 0.2969, valid loss: 2.1423, valid acc: 0.2777
Iter: 15000, train loss: 2.0435, train acc: 0.3594, valid loss: 2.1420, valid acc: 0.2785
Iter: 15100, train loss: 2.1889, train acc: 0.3281, valid loss: 2.1421, valid acc: 0.2792
Iter: 15200, train loss: 1.9777, train acc: 0.3125, valid loss: 2.1409, valid acc: 0.2785
Iter: 15300, train loss: 1.9020, train acc: 0.2969, valid loss: 2.1403, valid acc: 0.2800
Iter: 15400, train loss: 2.2222, train acc: 0.2812, valid loss: 2.1395, valid acc: 0.2785
Iter: 15500, train loss: 2.0344, train acc: 0.2656, valid loss: 2.1386, valid acc: 0.2769
Iter: 15600, train loss: 2.0819, train acc: 0.3594, valid loss: 2.1381, valid acc: 0.2792
Iter: 15700, train loss: 2.0575, train acc: 0.3438, valid loss: 2.1373, valid acc: 0.2769
Iter: 15800, train loss: 1.9971, train acc: 0.3438, valid loss: 2.1367, valid acc: 0.2769
Iter: 15900, train loss: 2.0265, train acc: 0.3281, valid loss: 2.1362, valid acc: 0.2792
Iter: 16000, train loss: 2.1902, train acc: 0.2500, valid loss: 2.1355, valid acc: 0.2792
Iter: 16100, train loss: 2.3303, train acc: 0.2500, valid loss: 2.1346, valid acc: 0.2800
Iter: 16200, train loss: 2.1863, train acc: 0.2656, valid loss: 2.1336, valid acc: 0.2792
Iter: 16300, train loss: 2.0637, train acc: 0.3594, valid loss: 2.1328, valid acc: 0.2808
Iter: 16400, train loss: 2.3589, train acc: 0.2969, valid loss: 2.1327, valid acc: 0.2800
Iter: 16500, train loss: 1.8036, train acc: 0.3750, valid loss: 2.1323, valid acc: 0.2823
Iter: 16600, train loss: 1.9800, train acc: 0.3269, valid loss: 2.1314, valid acc: 0.2815
Iter: 16700, train loss: 2.1594, train acc: 0.1719, valid loss: 2.1311, valid acc: 0.2838
Iter: 16800, train loss: 2.0424, train acc: 0.2969, valid loss: 2.1312, valid acc: 0.2846
Iter: 16900, train loss: 1.9296, train acc: 0.3438, valid loss: 2.1309, valid acc: 0.2823
Iter: 17000, train loss: 1.9263, train acc: 0.4219, valid loss: 2.1307, valid acc: 0.2838
Iter: 17100, train loss: 2.0511, train acc: 0.2812, valid loss: 2.1294, valid acc: 0.2831
Iter: 17200, train loss: 2.0496, train acc: 0.3281, valid loss: 2.1286, valid acc: 0.2838
Iter: 17300, train loss: 2.2233, train acc: 0.2500, valid loss: 2.1278, valid acc: 0.2823
Iter: 17400, train loss: 1.9002, train acc: 0.3594, valid loss: 2.1271, valid acc: 0.2862
Iter: 17500, train loss: 1.7229, train acc: 0.4844, valid loss: 2.1270, valid acc: 0.2846
Iter: 17600, train loss: 1.9173, train acc: 0.2969, valid loss: 2.1257, valid acc: 0.2854
Iter: 17700, train loss: 2.1851, train acc: 0.3125, valid loss: 2.1242, valid acc: 0.2877
Iter: 17800, train loss: 2.0161, train acc: 0.3125, valid loss: 2.1239, valid acc: 0.2854
Iter: 17900, train loss: 2.1663, train acc: 0.2969, valid loss: 2.1235, valid acc: 0.2877
Iter: 18000, train loss: 1.9980, train acc: 0.2812, valid loss: 2.1228, valid acc: 0.2862
Iter: 18100, train loss: 2.2097, train acc: 0.3125, valid loss: 2.1224, valid acc: 0.2838
Iter: 18200, train loss: 2.0980, train acc: 0.2969, valid loss: 2.1220, valid acc: 0.2885
Iter: 18300, train loss: 1.8596, train acc: 0.3125, valid loss: 2.1213, valid acc: 0.2869
Iter: 18400, train loss: 2.1165, train acc: 0.2500, valid loss: 2.1207, valid acc: 0.2885
Iter: 18500, train loss: 1.8769, train acc: 0.4219, valid loss: 2.1203, valid acc: 0.2846
Iter: 18600, train loss: 2.2151, train acc: 0.2500, valid loss: 2.1194, valid acc: 0.2869
Iter: 18700, train loss: 1.9389, train acc: 0.3281, valid loss: 2.1190, valid acc: 0.2877
Iter: 18800, train loss: 2.0908, train acc: 0.3281, valid loss: 2.1187, valid acc: 0.2885
Iter: 18900, train loss: 1.9623, train acc: 0.4219, valid loss: 2.1182, valid acc: 0.2885
Iter: 19000, train loss: 2.0713, train acc: 0.3125, valid loss: 2.1188, valid acc: 0.2908
Iter: 19100, train loss: 2.0000, train acc: 0.2969, valid loss: 2.1181, valid acc: 0.2931
Iter: 19200, train loss: 2.0367, train acc: 0.2656, valid loss: 2.1173, valid acc: 0.2946
Iter: 19300, train loss: 2.1734, train acc: 0.2500, valid loss: 2.1169, valid acc: 0.2931
Iter: 19400, train loss: 2.0923, train acc: 0.3125, valid loss: 2.1173, valid acc: 0.2892
Iter: 19500, train loss: 2.0152, train acc: 0.3281, valid loss: 2.1175, valid acc: 0.2946
Iter: 19600, train loss: 1.9224, train acc: 0.3438, valid loss: 2.1169, valid acc: 0.2946
Iter: 19700, train loss: 1.9928, train acc: 0.3281, valid loss: 2.1162, valid acc: 0.2923
Iter: 19800, train loss: 2.0822, train acc: 0.2969, valid loss: 2.1158, valid acc: 0.2954
Iter: 19900, train loss: 2.0263, train acc: 0.3438, valid loss: 2.1151, valid acc: 0.2938
Iter: 20000, train loss: 1.9129, train acc: 0.3906, valid loss: 2.1153, valid acc: 0.2938
Iter: 20100, train loss: 2.0481, train acc: 0.2812, valid loss: 2.1146, valid acc: 0.2946
Iter: 20200, train loss: 1.8207, train acc: 0.3906, valid loss: 2.1138, valid acc: 0.2954
Iter: 20300, train loss: 1.9979, train acc: 0.3125, valid loss: 2.1135, valid acc: 0.2977
Iter: 20400, train loss: 2.2494, train acc: 0.2188, valid loss: 2.1126, valid acc: 0.2962
Iter: 20500, train loss: 1.8585, train acc: 0.4219, valid loss: 2.1126, valid acc: 0.3000
Iter: 20600, train loss: 1.9711, train acc: 0.4219, valid loss: 2.1117, valid acc: 0.2985
Iter: 20700, train loss: 1.8490, train acc: 0.3906, valid loss: 2.1116, valid acc: 0.2977
Iter: 20800, train loss: 2.2379, train acc: 0.2812, valid loss: 2.1115, valid acc: 0.2969
Iter: 20900, train loss: 2.0823, train acc: 0.3281, valid loss: 2.1112, valid acc: 0.2977
Iter: 21000, train loss: 2.0497, train acc: 0.2969, valid loss: 2.1114, valid acc: 0.2985
Iter: 21100, train loss: 1.9034, train acc: 0.3438, valid loss: 2.1113, valid acc: 0.2985
Iter: 21200, train loss: 2.0002, train acc: 0.3906, valid loss: 2.1104, valid acc: 0.2985
Iter: 21300, train loss: 2.0900, train acc: 0.2031, valid loss: 2.1108, valid acc: 0.2985
Iter: 21400, train loss: 2.1758, train acc: 0.2812, valid loss: 2.1111, valid acc: 0.2985
Iter: 21500, train loss: 1.7988, train acc: 0.3594, valid loss: 2.1106, valid acc: 0.3015
Iter: 21600, train loss: 1.8888, train acc: 0.3438, valid loss: 2.1097, valid acc: 0.3015
Iter: 21700, train loss: 2.1431, train acc: 0.3281, valid loss: 2.1098, valid acc: 0.3023
Iter: 21800, train loss: 2.0802, train acc: 0.3750, valid loss: 2.1095, valid acc: 0.3008
Iter: 21900, train loss: 2.2211, train acc: 0.2812, valid loss: 2.1086, valid acc: 0.3000
Iter: 22000, train loss: 2.1286, train acc: 0.2812, valid loss: 2.1083, valid acc: 0.3015
Iter: 22100, train loss: 2.0215, train acc: 0.3906, valid loss: 2.1081, valid acc: 0.3015
Iter: 22200, train loss: 1.8577, train acc: 0.3750, valid loss: 2.1080, valid acc: 0.2992
Iter: 22300, train loss: 1.8655, train acc: 0.3906, valid loss: 2.1069, valid acc: 0.3023
Iter: 22400, train loss: 2.1593, train acc: 0.2344, valid loss: 2.1061, valid acc: 0.3023
Iter: 22500, train loss: 1.9987, train acc: 0.3906, valid loss: 2.1057, valid acc: 0.3038
Iter: 22600, train loss: 2.0266, train acc: 0.3438, valid loss: 2.1055, valid acc: 0.3069
Iter: 22700, train loss: 1.8135, train acc: 0.4062, valid loss: 2.1048, valid acc: 0.3062
Iter: 22800, train loss: 1.9791, train acc: 0.3281, valid loss: 2.1047, valid acc: 0.3054
Iter: 22900, train loss: 1.8353, train acc: 0.4219, valid loss: 2.1043, valid acc: 0.3062
Iter: 23000, train loss: 1.8800, train acc: 0.3281, valid loss: 2.1040, valid acc: 0.3062
Iter: 23100, train loss: 1.8570, train acc: 0.3281, valid loss: 2.1034, valid acc: 0.3069
Iter: 23200, train loss: 1.8494, train acc: 0.4219, valid loss: 2.1033, valid acc: 0.3077
Iter: 23300, train loss: 1.8748, train acc: 0.4062, valid loss: 2.1026, valid acc: 0.3085
Iter: 23400, train loss: 1.8440, train acc: 0.3594, valid loss: 2.1021, valid acc: 0.3108
Iter: 23500, train loss: 2.1015, train acc: 0.3438, valid loss: 2.1026, valid acc: 0.3092
Iter: 23600, train loss: 2.1226, train acc: 0.3594, valid loss: 2.1020, valid acc: 0.3085
Iter: 23700, train loss: 2.1038, train acc: 0.3125, valid loss: 2.1016, valid acc: 0.3062
Iter: 23800, train loss: 2.1856, train acc: 0.3125, valid loss: 2.1014, valid acc: 0.3046
Iter: 23900, train loss: 1.9611, train acc: 0.3438, valid loss: 2.1009, valid acc: 0.3085
Iter: 24000, train loss: 2.0908, train acc: 0.3594, valid loss: 2.1003, valid acc: 0.3092
Iter: 24100, train loss: 1.9667, train acc: 0.3281, valid loss: 2.1006, valid acc: 0.3092
Iter: 24200, train loss: 2.0087, train acc: 0.2656, valid loss: 2.1004, valid acc: 0.3092
Iter: 24300, train loss: 1.8131, train acc: 0.3125, valid loss: 2.1001, valid acc: 0.3085
Iter: 24400, train loss: 1.8975, train acc: 0.4375, valid loss: 2.0996, valid acc: 0.3085
Iter: 24500, train loss: 1.9091, train acc: 0.4062, valid loss: 2.0989, valid acc: 0.3085
Iter: 24600, train loss: 2.3475, train acc: 0.2656, valid loss: 2.0987, valid acc: 0.3115
Iter: 24700, train loss: 2.0989, train acc: 0.2969, valid loss: 2.0979, valid acc: 0.3100
Iter: 24800, train loss: 2.3846, train acc: 0.1562, valid loss: 2.0975, valid acc: 0.3100
Iter: 24900, train loss: 1.9721, train acc: 0.3750, valid loss: 2.0966, valid acc: 0.3115
Iter: 25000, train loss: 2.1943, train acc: 0.2031, valid loss: 2.0961, valid acc: 0.3100
Iter: 25100, train loss: 1.8798, train acc: 0.4062, valid loss: 2.0955, valid acc: 0.3131
Iter: 25200, train loss: 1.9212, train acc: 0.3438, valid loss: 2.0951, valid acc: 0.3138
Iter: 25300, train loss: 2.2521, train acc: 0.2500, valid loss: 2.0952, valid acc: 0.3154
Iter: 25400, train loss: 2.0484, train acc: 0.3594, valid loss: 2.0951, valid acc: 0.3123
Iter: 25500, train loss: 1.9813, train acc: 0.4062, valid loss: 2.0949, valid acc: 0.3131
Iter: 25600, train loss: 2.0425, train acc: 0.3125, valid loss: 2.0953, valid acc: 0.3123
Iter: 25700, train loss: 2.0350, train acc: 0.2812, valid loss: 2.0954, valid acc: 0.3108
Iter: 25800, train loss: 2.1842, train acc: 0.2656, valid loss: 2.0953, valid acc: 0.3123
Iter: 25900, train loss: 2.1242, train acc: 0.2812, valid loss: 2.0951, valid acc: 0.3092
Iter: 26000, train loss: 2.1585, train acc: 0.3438, valid loss: 2.0947, valid acc: 0.3115
Iter: 26100, train loss: 2.1771, train acc: 0.2656, valid loss: 2.0940, valid acc: 0.3123
Iter: 26200, train loss: 1.8799, train acc: 0.3125, valid loss: 2.0939, valid acc: 0.3123
Iter: 26300, train loss: 2.0405, train acc: 0.3281, valid loss: 2.0939, valid acc: 0.3123
Iter: 26400, train loss: 2.1335, train acc: 0.2969, valid loss: 2.0934, valid acc: 0.3115
Iter: 26500, train loss: 1.8773, train acc: 0.3125, valid loss: 2.0931, valid acc: 0.3100
Iter: 26600, train loss: 1.9610, train acc: 0.3906, valid loss: 2.0930, valid acc: 0.3108
Iter: 26700, train loss: 2.1639, train acc: 0.2656, valid loss: 2.0930, valid acc: 0.3108
Iter: 26800, train loss: 1.9755, train acc: 0.3750, valid loss: 2.0930, valid acc: 0.3115
Iter: 26900, train loss: 2.0189, train acc: 0.4062, valid loss: 2.0926, valid acc: 0.3115
Iter: 27000, train loss: 1.9838, train acc: 0.3594, valid loss: 2.0919, valid acc: 0.3131
Iter: 27100, train loss: 2.1320, train acc: 0.3281, valid loss: 2.0912, valid acc: 0.3131
Iter: 27200, train loss: 2.0593, train acc: 0.3750, valid loss: 2.0913, valid acc: 0.3131
Iter: 27300, train loss: 2.2315, train acc: 0.2656, valid loss: 2.0911, valid acc: 0.3115
Iter: 27400, train loss: 1.9924, train acc: 0.3438, valid loss: 2.0915, valid acc: 0.3115
Iter: 27500, train loss: 1.9123, train acc: 0.3906, valid loss: 2.0909, valid acc: 0.3108
Iter: 27600, train loss: 2.1935, train acc: 0.3125, valid loss: 2.0906, valid acc: 0.3138
Iter: 27700, train loss: 2.0510, train acc: 0.3594, valid loss: 2.0915, valid acc: 0.3169
Iter: 27800, train loss: 1.8829, train acc: 0.4062, valid loss: 2.0903, valid acc: 0.3154
Iter: 27900, train loss: 2.0625, train acc: 0.3125, valid loss: 2.0899, valid acc: 0.3146
Iter: 28000, train loss: 2.0656, train acc: 0.2812, valid loss: 2.0897, valid acc: 0.3169
Iter: 28100, train loss: 1.9013, train acc: 0.2656, valid loss: 2.0899, valid acc: 0.3154
Iter: 28200, train loss: 2.3960, train acc: 0.1875, valid loss: 2.0895, valid acc: 0.3154
Iter: 28300, train loss: 1.8959, train acc: 0.3750, valid loss: 2.0893, valid acc: 0.3146
Iter: 28400, train loss: 2.0423, train acc: 0.3594, valid loss: 2.0888, valid acc: 0.3123
Iter: 28500, train loss: 2.1221, train acc: 0.2812, valid loss: 2.0890, valid acc: 0.3162
Iter: 28600, train loss: 2.2403, train acc: 0.2812, valid loss: 2.0891, valid acc: 0.3146
Iter: 28700, train loss: 2.1620, train acc: 0.3594, valid loss: 2.0890, valid acc: 0.3169
Iter: 28800, train loss: 2.0554, train acc: 0.3125, valid loss: 2.0886, valid acc: 0.3138
Iter: 28900, train loss: 1.7853, train acc: 0.3594, valid loss: 2.0883, valid acc: 0.3154
Iter: 29000, train loss: 1.9916, train acc: 0.2812, valid loss: 2.0882, valid acc: 0.3146
Iter: 29100, train loss: 2.1244, train acc: 0.2969, valid loss: 2.0872, valid acc: 0.3169
Iter: 29200, train loss: 2.1629, train acc: 0.2812, valid loss: 2.0868, valid acc: 0.3146
Iter: 29300, train loss: 2.0649, train acc: 0.3125, valid loss: 2.0866, valid acc: 0.3146
Iter: 29400, train loss: 2.0768, train acc: 0.3438, valid loss: 2.0865, valid acc: 0.3192
Iter: 29500, train loss: 2.0052, train acc: 0.4219, valid loss: 2.0859, valid acc: 0.3177
Iter: 29600, train loss: 2.1250, train acc: 0.2812, valid loss: 2.0859, valid acc: 0.3154
Iter: 29700, train loss: 1.9466, train acc: 0.3125, valid loss: 2.0861, valid acc: 0.3154
Iter: 29800, train loss: 2.0921, train acc: 0.3125, valid loss: 2.0853, valid acc: 0.3177
Iter: 29900, train loss: 2.2278, train acc: 0.2812, valid loss: 2.0853, valid acc: 0.3177
Iter: 30000, train loss: 2.0943, train acc: 0.2969, valid loss: 2.0850, valid acc: 0.3192
Iter: 30100, train loss: 1.9036, train acc: 0.3750, valid loss: 2.0840, valid acc: 0.3192
Iter: 30200, train loss: 2.0592, train acc: 0.3281, valid loss: 2.0840, valid acc: 0.3231
Iter: 30300, train loss: 1.9588, train acc: 0.3125, valid loss: 2.0832, valid acc: 0.3231
Iter: 30400, train loss: 2.1379, train acc: 0.2969, valid loss: 2.0833, valid acc: 0.3200
Iter: 30500, train loss: 1.9089, train acc: 0.3594, valid loss: 2.0834, valid acc: 0.3215
Iter: 30600, train loss: 2.0606, train acc: 0.3906, valid loss: 2.0830, valid acc: 0.3177
Iter: 30700, train loss: 2.0014, train acc: 0.3281, valid loss: 2.0824, valid acc: 0.3208
Iter: 30800, train loss: 1.9814, train acc: 0.3594, valid loss: 2.0819, valid acc: 0.3177
Iter: 30900, train loss: 2.0749, train acc: 0.3281, valid loss: 2.0822, valid acc: 0.3185
Iter: 31000, train loss: 2.0851, train acc: 0.3750, valid loss: 2.0817, valid acc: 0.3208
Iter: 31100, train loss: 2.1676, train acc: 0.2500, valid loss: 2.0816, valid acc: 0.3215
Iter: 31200, train loss: 2.1747, train acc: 0.2656, valid loss: 2.0818, valid acc: 0.3200
Iter: 31300, train loss: 2.2367, train acc: 0.3281, valid loss: 2.0808, valid acc: 0.3200
Iter: 31400, train loss: 1.7533, train acc: 0.5156, valid loss: 2.0807, valid acc: 0.3185
Iter: 31500, train loss: 1.8330, train acc: 0.3438, valid loss: 2.0810, valid acc: 0.3185
Iter: 31600, train loss: 1.9205, train acc: 0.2969, valid loss: 2.0808, valid acc: 0.3246
Iter: 31700, train loss: 1.8434, train acc: 0.2812, valid loss: 2.0810, valid acc: 0.3200
Iter: 31800, train loss: 2.1878, train acc: 0.2188, valid loss: 2.0808, valid acc: 0.3223
Iter: 31900, train loss: 1.9998, train acc: 0.3750, valid loss: 2.0814, valid acc: 0.3185
Iter: 32000, train loss: 1.9804, train acc: 0.2969, valid loss: 2.0809, valid acc: 0.3192
Iter: 32100, train loss: 1.9823, train acc: 0.3125, valid loss: 2.0808, valid acc: 0.3208
Iter: 32200, train loss: 1.8450, train acc: 0.3438, valid loss: 2.0805, valid acc: 0.3215
Iter: 32300, train loss: 2.1468, train acc: 0.3281, valid loss: 2.0803, valid acc: 0.3231
Iter: 32400, train loss: 2.0213, train acc: 0.3438, valid loss: 2.0799, valid acc: 0.3208
Iter: 32500, train loss: 2.0778, train acc: 0.4219, valid loss: 2.0797, valid acc: 0.3215
Iter: 32600, train loss: 2.0991, train acc: 0.3125, valid loss: 2.0799, valid acc: 0.3223
Iter: 32700, train loss: 1.9935, train acc: 0.3438, valid loss: 2.0798, valid acc: 0.3208
Iter: 32800, train loss: 2.0148, train acc: 0.3281, valid loss: 2.0799, valid acc: 0.3185
Iter: 32900, train loss: 1.7923, train acc: 0.4219, valid loss: 2.0802, valid acc: 0.3169
Iter: 33000, train loss: 1.9914, train acc: 0.2969, valid loss: 2.0804, valid acc: 0.3154
Iter: 33100, train loss: 2.2501, train acc: 0.2656, valid loss: 2.0798, valid acc: 0.3185
Iter: 33200, train loss: 1.8388, train acc: 0.4219, valid loss: 2.0796, valid acc: 0.3200
Iter: 33300, train loss: 1.8255, train acc: 0.3654, valid loss: 2.0793, valid acc: 0.3192
Iter: 33400, train loss: 1.9434, train acc: 0.3125, valid loss: 2.0790, valid acc: 0.3192
Iter: 33500, train loss: 1.9186, train acc: 0.3750, valid loss: 2.0785, valid acc: 0.3200
Iter: 33600, train loss: 2.1906, train acc: 0.2812, valid loss: 2.0782, valid acc: 0.3177
Iter: 33700, train loss: 1.8581, train acc: 0.3281, valid loss: 2.0781, valid acc: 0.3192
Iter: 33800, train loss: 1.8474, train acc: 0.3750, valid loss: 2.0779, valid acc: 0.3200
Iter: 33900, train loss: 1.8516, train acc: 0.2969, valid loss: 2.0780, valid acc: 0.3177
Iter: 34000, train loss: 1.9283, train acc: 0.4688, valid loss: 2.0779, valid acc: 0.3192
Iter: 34100, train loss: 2.1066, train acc: 0.3438, valid loss: 2.0777, valid acc: 0.3192
Iter: 34200, train loss: 2.2252, train acc: 0.2812, valid loss: 2.0772, valid acc: 0.3208
Iter: 34300, train loss: 2.0983, train acc: 0.3594, valid loss: 2.0769, valid acc: 0.3200
Iter: 34400, train loss: 1.9562, train acc: 0.2969, valid loss: 2.0766, valid acc: 0.3192
Iter: 34500, train loss: 1.9767, train acc: 0.3594, valid loss: 2.0773, valid acc: 0.3192
Iter: 34600, train loss: 1.8975, train acc: 0.4062, valid loss: 2.0777, valid acc: 0.3169
Iter: 34700, train loss: 2.0704, train acc: 0.3125, valid loss: 2.0773, valid acc: 0.3200
Iter: 34800, train loss: 2.1440, train acc: 0.2969, valid loss: 2.0778, valid acc: 0.3223
Iter: 34900, train loss: 1.9205, train acc: 0.2656, valid loss: 2.0772, valid acc: 0.3208
Iter: 35000, train loss: 2.1177, train acc: 0.2969, valid loss: 2.0764, valid acc: 0.3215
Iter: 35100, train loss: 2.2733, train acc: 0.2812, valid loss: 2.0764, valid acc: 0.3223
Iter: 35200, train loss: 2.0763, train acc: 0.3281, valid loss: 2.0768, valid acc: 0.3223
Iter: 35300, train loss: 2.0108, train acc: 0.3594, valid loss: 2.0762, valid acc: 0.3215
Iter: 35400, train loss: 1.8519, train acc: 0.4219, valid loss: 2.0761, valid acc: 0.3208
Iter: 35500, train loss: 1.8492, train acc: 0.3906, valid loss: 2.0755, valid acc: 0.3200
Iter: 35600, train loss: 1.9897, train acc: 0.3125, valid loss: 2.0750, valid acc: 0.3246
Iter: 35700, train loss: 2.1377, train acc: 0.3594, valid loss: 2.0749, valid acc: 0.3238
Iter: 35800, train loss: 2.0077, train acc: 0.3750, valid loss: 2.0751, valid acc: 0.3246
Iter: 35900, train loss: 1.8566, train acc: 0.3906, valid loss: 2.0752, valid acc: 0.3223
Iter: 36000, train loss: 2.0754, train acc: 0.2344, valid loss: 2.0749, valid acc: 0.3215
Iter: 36100, train loss: 1.8958, train acc: 0.3281, valid loss: 2.0744, valid acc: 0.3215
Iter: 36200, train loss: 2.0618, train acc: 0.3438, valid loss: 2.0738, valid acc: 0.3223
Iter: 36300, train loss: 1.9383, train acc: 0.4062, valid loss: 2.0733, valid acc: 0.3208
Iter: 36400, train loss: 1.9716, train acc: 0.3281, valid loss: 2.0737, valid acc: 0.3215
Iter: 36500, train loss: 1.9387, train acc: 0.2969, valid loss: 2.0732, valid acc: 0.3215
Iter: 36600, train loss: 1.8087, train acc: 0.3750, valid loss: 2.0726, valid acc: 0.3215
Iter: 36700, train loss: 1.9933, train acc: 0.3281, valid loss: 2.0726, valid acc: 0.3208
Iter: 36800, train loss: 1.8742, train acc: 0.3750, valid loss: 2.0728, valid acc: 0.3208
Iter: 36900, train loss: 2.0430, train acc: 0.3438, valid loss: 2.0728, valid acc: 0.3208
Iter: 37000, train loss: 2.1315, train acc: 0.3438, valid loss: 2.0726, valid acc: 0.3223
Iter: 37100, train loss: 2.1222, train acc: 0.2969, valid loss: 2.0728, valid acc: 0.3238
Iter: 37200, train loss: 2.1334, train acc: 0.3594, valid loss: 2.0725, valid acc: 0.3223
Iter: 37300, train loss: 1.8483, train acc: 0.4375, valid loss: 2.0719, valid acc: 0.3200
Iter: 37400, train loss: 2.1526, train acc: 0.2500, valid loss: 2.0726, valid acc: 0.3185
Iter: 37500, train loss: 2.0533, train acc: 0.3438, valid loss: 2.0726, valid acc: 0.3177
Iter: 37600, train loss: 1.8583, train acc: 0.4062, valid loss: 2.0725, valid acc: 0.3192
Iter: 37700, train loss: 1.7702, train acc: 0.3906, valid loss: 2.0720, valid acc: 0.3200
Iter: 37800, train loss: 2.1174, train acc: 0.2812, valid loss: 2.0723, valid acc: 0.3200
Iter: 37900, train loss: 1.8486, train acc: 0.4062, valid loss: 2.0722, valid acc: 0.3200
Iter: 38000, train loss: 1.9358, train acc: 0.3750, valid loss: 2.0720, valid acc: 0.3215
Iter: 38100, train loss: 1.8390, train acc: 0.3906, valid loss: 2.0728, valid acc: 0.3231
Iter: 38200, train loss: 1.9426, train acc: 0.3125, valid loss: 2.0721, valid acc: 0.3215
Iter: 38300, train loss: 1.8960, train acc: 0.3438, valid loss: 2.0728, valid acc: 0.3208
Iter: 38400, train loss: 2.1154, train acc: 0.3438, valid loss: 2.0729, valid acc: 0.3231
Iter: 38500, train loss: 2.0267, train acc: 0.3438, valid loss: 2.0724, valid acc: 0.3238
Iter: 38600, train loss: 1.9984, train acc: 0.3594, valid loss: 2.0721, valid acc: 0.3238
Iter: 38700, train loss: 1.7149, train acc: 0.4062, valid loss: 2.0716, valid acc: 0.3231
Iter: 38800, train loss: 2.0550, train acc: 0.3281, valid loss: 2.0718, valid acc: 0.3238
Iter: 38900, train loss: 2.0262, train acc: 0.4062, valid loss: 2.0711, valid acc: 0.3238
Iter: 39000, train loss: 2.1016, train acc: 0.2969, valid loss: 2.0713, valid acc: 0.3246
Iter: 39100, train loss: 2.0548, train acc: 0.3594, valid loss: 2.0712, valid acc: 0.3238
Iter: 39200, train loss: 1.9232, train acc: 0.3906, valid loss: 2.0709, valid acc: 0.3246
Iter: 39300, train loss: 2.0291, train acc: 0.2969, valid loss: 2.0707, valid acc: 0.3254
Iter: 39400, train loss: 1.8813, train acc: 0.4062, valid loss: 2.0704, valid acc: 0.3254
Iter: 39500, train loss: 1.8961, train acc: 0.3750, valid loss: 2.0706, valid acc: 0.3246
Iter: 39600, train loss: 2.0546, train acc: 0.3281, valid loss: 2.0699, valid acc: 0.3254
Iter: 39700, train loss: 1.7376, train acc: 0.3594, valid loss: 2.0700, valid acc: 0.3254
Iter: 39800, train loss: 1.9728, train acc: 0.3750, valid loss: 2.0699, valid acc: 0.3231
Iter: 39900, train loss: 2.1678, train acc: 0.2500, valid loss: 2.0692, valid acc: 0.3254
Iter: 40000, train loss: 2.0347, train acc: 0.2969, valid loss: 2.0690, valid acc: 0.3254
Iter: 40100, train loss: 1.9711, train acc: 0.2969, valid loss: 2.0680, valid acc: 0.3254
Iter: 40200, train loss: 2.1633, train acc: 0.2812, valid loss: 2.0680, valid acc: 0.3254
Iter: 40300, train loss: 2.0320, train acc: 0.2656, valid loss: 2.0681, valid acc: 0.3254
Iter: 40400, train loss: 2.0375, train acc: 0.3281, valid loss: 2.0676, valid acc: 0.3246
Iter: 40500, train loss: 1.9701, train acc: 0.3594, valid loss: 2.0676, valid acc: 0.3238
Iter: 40600, train loss: 1.8741, train acc: 0.3594, valid loss: 2.0672, valid acc: 0.3246
Iter: 40700, train loss: 1.9772, train acc: 0.3750, valid loss: 2.0669, valid acc: 0.3254
Iter: 40800, train loss: 2.1924, train acc: 0.2344, valid loss: 2.0671, valid acc: 0.3269
Iter: 40900, train loss: 1.9105, train acc: 0.3906, valid loss: 2.0671, valid acc: 0.3269
Iter: 41000, train loss: 2.1490, train acc: 0.3906, valid loss: 2.0675, valid acc: 0.3246
Iter: 41100, train loss: 1.8345, train acc: 0.3750, valid loss: 2.0672, valid acc: 0.3254
Iter: 41200, train loss: 2.0266, train acc: 0.3906, valid loss: 2.0665, valid acc: 0.3262
Iter: 41300, train loss: 1.8965, train acc: 0.3281, valid loss: 2.0669, valid acc: 0.3285
Iter: 41400, train loss: 1.9862, train acc: 0.3438, valid loss: 2.0673, valid acc: 0.3262
Iter: 41500, train loss: 1.9289, train acc: 0.4375, valid loss: 2.0675, valid acc: 0.3246
Iter: 41600, train loss: 1.8417, train acc: 0.4844, valid loss: 2.0678, valid acc: 0.3262
Iter: 41700, train loss: 1.9450, train acc: 0.3594, valid loss: 2.0676, valid acc: 0.3269
Iter: 41800, train loss: 1.8269, train acc: 0.5000, valid loss: 2.0670, valid acc: 0.3238
Iter: 41900, train loss: 1.9314, train acc: 0.4062, valid loss: 2.0676, valid acc: 0.3246
Iter: 42000, train loss: 1.9542, train acc: 0.3906, valid loss: 2.0668, valid acc: 0.3254
Iter: 42100, train loss: 1.8759, train acc: 0.4844, valid loss: 2.0664, valid acc: 0.3223
Iter: 42200, train loss: 1.9101, train acc: 0.3750, valid loss: 2.0663, valid acc: 0.3215
Iter: 42300, train loss: 1.9414, train acc: 0.3906, valid loss: 2.0660, valid acc: 0.3223
Iter: 42400, train loss: 1.8713, train acc: 0.3906, valid loss: 2.0668, valid acc: 0.3231
Iter: 42500, train loss: 1.7783, train acc: 0.3906, valid loss: 2.0670, valid acc: 0.3262
Iter: 42600, train loss: 1.8957, train acc: 0.3594, valid loss: 2.0663, valid acc: 0.3254
Iter: 42700, train loss: 1.9428, train acc: 0.3281, valid loss: 2.0656, valid acc: 0.3262
Iter: 42800, train loss: 1.8574, train acc: 0.4531, valid loss: 2.0655, valid acc: 0.3262
Iter: 42900, train loss: 1.9420, train acc: 0.3281, valid loss: 2.0657, valid acc: 0.3254
Iter: 43000, train loss: 1.9830, train acc: 0.3594, valid loss: 2.0661, valid acc: 0.3246
Iter: 43100, train loss: 2.0116, train acc: 0.3750, valid loss: 2.0663, valid acc: 0.3262
Iter: 43200, train loss: 2.0198, train acc: 0.4062, valid loss: 2.0660, valid acc: 0.3285
Iter: 43300, train loss: 2.0083, train acc: 0.3281, valid loss: 2.0663, valid acc: 0.3292
Iter: 43400, train loss: 1.6697, train acc: 0.3125, valid loss: 2.0664, valid acc: 0.3246
Iter: 43500, train loss: 1.9192, train acc: 0.2969, valid loss: 2.0659, valid acc: 0.3269
Iter: 43600, train loss: 1.9535, train acc: 0.2969, valid loss: 2.0664, valid acc: 0.3262
Iter: 43700, train loss: 1.9034, train acc: 0.3594, valid loss: 2.0652, valid acc: 0.3254
Iter: 43800, train loss: 1.8829, train acc: 0.3594, valid loss: 2.0650, valid acc: 0.3262
Iter: 43900, train loss: 2.1276, train acc: 0.3594, valid loss: 2.0653, valid acc: 0.3285
Iter: 44000, train loss: 1.9287, train acc: 0.4688, valid loss: 2.0656, valid acc: 0.3292
Iter: 44100, train loss: 2.0828, train acc: 0.4219, valid loss: 2.0651, valid acc: 0.3285
Iter: 44200, train loss: 1.8049, train acc: 0.4219, valid loss: 2.0642, valid acc: 0.3277
Iter: 44300, train loss: 2.0153, train acc: 0.3750, valid loss: 2.0636, valid acc: 0.3292
Iter: 44400, train loss: 1.8351, train acc: 0.4531, valid loss: 2.0639, valid acc: 0.3269
Iter: 44500, train loss: 2.1672, train acc: 0.3438, valid loss: 2.0636, valid acc: 0.3254
Iter: 44600, train loss: 2.1297, train acc: 0.3125, valid loss: 2.0637, valid acc: 0.3269
Iter: 44700, train loss: 1.8564, train acc: 0.4531, valid loss: 2.0636, valid acc: 0.3285
Iter: 44800, train loss: 2.2172, train acc: 0.2500, valid loss: 2.0636, valid acc: 0.3285
Iter: 44900, train loss: 1.7654, train acc: 0.4531, valid loss: 2.0634, valid acc: 0.3285
Iter: 45000, train loss: 2.1088, train acc: 0.3438, valid loss: 2.0634, valid acc: 0.3285
Iter: 45100, train loss: 2.0295, train acc: 0.3281, valid loss: 2.0626, valid acc: 0.3292
Iter: 45200, train loss: 2.1290, train acc: 0.2812, valid loss: 2.0617, valid acc: 0.3269
Iter: 45300, train loss: 1.9813, train acc: 0.3438, valid loss: 2.0612, valid acc: 0.3285
Iter: 45400, train loss: 2.0270, train acc: 0.2969, valid loss: 2.0608, valid acc: 0.3285
Iter: 45500, train loss: 2.0775, train acc: 0.2500, valid loss: 2.0608, valid acc: 0.3285
Iter: 45600, train loss: 1.9223, train acc: 0.3906, valid loss: 2.0612, valid acc: 0.3262
Iter: 45700, train loss: 2.0896, train acc: 0.2812, valid loss: 2.0612, valid acc: 0.3262
Iter: 45800, train loss: 1.8030, train acc: 0.4062, valid loss: 2.0610, valid acc: 0.3254
Iter: 45900, train loss: 1.9917, train acc: 0.3125, valid loss: 2.0610, valid acc: 0.3269
Iter: 46000, train loss: 2.0966, train acc: 0.2500, valid loss: 2.0612, valid acc: 0.3277
Iter: 46100, train loss: 2.0700, train acc: 0.3750, valid loss: 2.0607, valid acc: 0.3292
Iter: 46200, train loss: 1.9297, train acc: 0.3125, valid loss: 2.0612, valid acc: 0.3277
Iter: 46300, train loss: 2.0841, train acc: 0.3281, valid loss: 2.0611, valid acc: 0.3285
Iter: 46400, train loss: 2.0020, train acc: 0.2344, valid loss: 2.0618, valid acc: 0.3269
Iter: 46500, train loss: 2.2461, train acc: 0.2500, valid loss: 2.0612, valid acc: 0.3277
Iter: 46600, train loss: 2.1441, train acc: 0.2969, valid loss: 2.0610, valid acc: 0.3254
Iter: 46700, train loss: 2.0359, train acc: 0.2500, valid loss: 2.0606, valid acc: 0.3269
Iter: 46800, train loss: 1.9906, train acc: 0.3750, valid loss: 2.0606, valid acc: 0.3277
Iter: 46900, train loss: 1.8335, train acc: 0.4375, valid loss: 2.0598, valid acc: 0.3269
Iter: 47000, train loss: 2.0549, train acc: 0.3750, valid loss: 2.0598, valid acc: 0.3277
Iter: 47100, train loss: 1.8450, train acc: 0.3750, valid loss: 2.0594, valid acc: 0.3292
Iter: 47200, train loss: 2.2130, train acc: 0.4062, valid loss: 2.0595, valid acc: 0.3285
Iter: 47300, train loss: 1.7942, train acc: 0.4219, valid loss: 2.0593, valid acc: 0.3277
Iter: 47400, train loss: 1.7379, train acc: 0.5469, valid loss: 2.0588, valid acc: 0.3269
Iter: 47500, train loss: 1.8645, train acc: 0.4219, valid loss: 2.0590, valid acc: 0.3292
Iter: 47600, train loss: 2.0404, train acc: 0.3594, valid loss: 2.0592, valid acc: 0.3292
Iter: 47700, train loss: 1.8822, train acc: 0.2969, valid loss: 2.0594, valid acc: 0.3300
Iter: 47800, train loss: 1.7706, train acc: 0.4219, valid loss: 2.0597, valid acc: 0.3292
Iter: 47900, train loss: 1.8434, train acc: 0.4062, valid loss: 2.0602, valid acc: 0.3277
Iter: 48000, train loss: 2.0403, train acc: 0.2969, valid loss: 2.0591, valid acc: 0.3285
Iter: 48100, train loss: 1.8898, train acc: 0.3750, valid loss: 2.0593, valid acc: 0.3285
Iter: 48200, train loss: 1.6890, train acc: 0.3750, valid loss: 2.0597, valid acc: 0.3300
Iter: 48300, train loss: 1.6799, train acc: 0.5469, valid loss: 2.0599, valid acc: 0.3300
Iter: 48400, train loss: 1.7233, train acc: 0.4375, valid loss: 2.0598, valid acc: 0.3308
Iter: 48500, train loss: 1.8533, train acc: 0.3438, valid loss: 2.0595, valid acc: 0.3315
Iter: 48600, train loss: 2.0777, train acc: 0.2812, valid loss: 2.0590, valid acc: 0.3315
Iter: 48700, train loss: 1.9464, train acc: 0.3750, valid loss: 2.0590, valid acc: 0.3308
Iter: 48800, train loss: 1.9449, train acc: 0.3750, valid loss: 2.0585, valid acc: 0.3300
Iter: 48900, train loss: 1.7652, train acc: 0.4062, valid loss: 2.0579, valid acc: 0.3292
Iter: 49000, train loss: 1.9736, train acc: 0.3594, valid loss: 2.0583, valid acc: 0.3292
Iter: 49100, train loss: 1.8261, train acc: 0.3750, valid loss: 2.0584, valid acc: 0.3308
Iter: 49200, train loss: 1.6909, train acc: 0.4219, valid loss: 2.0586, valid acc: 0.3315
Iter: 49300, train loss: 2.1251, train acc: 0.2500, valid loss: 2.0590, valid acc: 0.3308
Iter: 49400, train loss: 1.9137, train acc: 0.3906, valid loss: 2.0589, valid acc: 0.3308
Iter: 49500, train loss: 1.8332, train acc: 0.3125, valid loss: 2.0591, valid acc: 0.3308
Iter: 49600, train loss: 1.9082, train acc: 0.4219, valid loss: 2.0590, valid acc: 0.3315
Iter: 49700, train loss: 1.9949, train acc: 0.2969, valid loss: 2.0587, valid acc: 0.3308
Iter: 49800, train loss: 1.9037, train acc: 0.3906, valid loss: 2.0593, valid acc: 0.3285
Iter: 49900, train loss: 2.1516, train acc: 0.3125, valid loss: 2.0595, valid acc: 0.3285
Iter: 50000, train loss: 1.8874, train acc: 0.4375, valid loss: 2.0592, valid acc: 0.3292
Iter: 50100, train loss: 2.0170, train acc: 0.3125, valid loss: 2.0586, valid acc: 0.3323
Iter: 50200, train loss: 2.0856, train acc: 0.3438, valid loss: 2.0585, valid acc: 0.3300
Iter: 50300, train loss: 2.0302, train acc: 0.3125, valid loss: 2.0585, valid acc: 0.3300
Iter: 50400, train loss: 1.8046, train acc: 0.4375, valid loss: 2.0581, valid acc: 0.3285
Iter: 50500, train loss: 1.9291, train acc: 0.3281, valid loss: 2.0586, valid acc: 0.3277
Iter: 50600, train loss: 2.1281, train acc: 0.2969, valid loss: 2.0586, valid acc: 0.3285
Iter: 50700, train loss: 2.1097, train acc: 0.3281, valid loss: 2.0587, valid acc: 0.3277
Iter: 50800, train loss: 1.7926, train acc: 0.4688, valid loss: 2.0585, valid acc: 0.3262
Iter: 50900, train loss: 2.0393, train acc: 0.2656, valid loss: 2.0580, valid acc: 0.3269
Iter: 51000, train loss: 1.9208, train acc: 0.2812, valid loss: 2.0579, valid acc: 0.3277
Iter: 51100, train loss: 1.9408, train acc: 0.3438, valid loss: 2.0577, valid acc: 0.3308
Iter: 51200, train loss: 1.9633, train acc: 0.4062, valid loss: 2.0573, valid acc: 0.3323
Iter: 51300, train loss: 2.0078, train acc: 0.2969, valid loss: 2.0566, valid acc: 0.3308
Iter: 51400, train loss: 2.1490, train acc: 0.3906, valid loss: 2.0563, valid acc: 0.3292
Iter: 51500, train loss: 2.0912, train acc: 0.3125, valid loss: 2.0564, valid acc: 0.3285
Iter: 51600, train loss: 2.0568, train acc: 0.3906, valid loss: 2.0563, valid acc: 0.3292
Iter: 51700, train loss: 2.0584, train acc: 0.2969, valid loss: 2.0558, valid acc: 0.3308
Iter: 51800, train loss: 1.8437, train acc: 0.3594, valid loss: 2.0560, valid acc: 0.3315
Iter: 51900, train loss: 2.0214, train acc: 0.3125, valid loss: 2.0567, valid acc: 0.3308
Iter: 52000, train loss: 1.7752, train acc: 0.4062, valid loss: 2.0568, valid acc: 0.3292
Iter: 52100, train loss: 1.9167, train acc: 0.4062, valid loss: 2.0565, valid acc: 0.3300
Iter: 52200, train loss: 2.0405, train acc: 0.3438, valid loss: 2.0563, valid acc: 0.3315
Iter: 52300, train loss: 1.8638, train acc: 0.3906, valid loss: 2.0562, valid acc: 0.3308
Iter: 52400, train loss: 2.0498, train acc: 0.3125, valid loss: 2.0568, valid acc: 0.3308
Iter: 52500, train loss: 2.0355, train acc: 0.3281, valid loss: 2.0565, valid acc: 0.3308
Iter: 52600, train loss: 1.9660, train acc: 0.4531, valid loss: 2.0560, valid acc: 0.3300
Iter: 52700, train loss: 1.8683, train acc: 0.3125, valid loss: 2.0560, valid acc: 0.3269
Iter: 52800, train loss: 1.9918, train acc: 0.3281, valid loss: 2.0555, valid acc: 0.3300
Iter: 52900, train loss: 1.8543, train acc: 0.4219, valid loss: 2.0545, valid acc: 0.3292
Iter: 53000, train loss: 2.0767, train acc: 0.3281, valid loss: 2.0550, valid acc: 0.3285
Iter: 53100, train loss: 1.8883, train acc: 0.4531, valid loss: 2.0536, valid acc: 0.3308
Iter: 53200, train loss: 2.0700, train acc: 0.2656, valid loss: 2.0535, valid acc: 0.3323
Iter: 53300, train loss: 1.8209, train acc: 0.3906, valid loss: 2.0536, valid acc: 0.3300
Iter: 53400, train loss: 1.7760, train acc: 0.4531, valid loss: 2.0538, valid acc: 0.3285
Iter: 53500, train loss: 2.0701, train acc: 0.3594, valid loss: 2.0534, valid acc: 0.3315
Iter: 53600, train loss: 1.9886, train acc: 0.2812, valid loss: 2.0537, valid acc: 0.3285
Iter: 53700, train loss: 1.7939, train acc: 0.4844, valid loss: 2.0537, valid acc: 0.3277
Iter: 53800, train loss: 1.8850, train acc: 0.3281, valid loss: 2.0538, valid acc: 0.3277
Iter: 53900, train loss: 2.0330, train acc: 0.3281, valid loss: 2.0538, valid acc: 0.3277
Iter: 54000, train loss: 1.8556, train acc: 0.2656, valid loss: 2.0532, valid acc: 0.3292
Iter: 54100, train loss: 1.8939, train acc: 0.4219, valid loss: 2.0534, valid acc: 0.3308
Iter: 54200, train loss: 2.0815, train acc: 0.3906, valid loss: 2.0543, valid acc: 0.3323
Iter: 54300, train loss: 1.9749, train acc: 0.3906, valid loss: 2.0546, valid acc: 0.3323
Iter: 54400, train loss: 1.7781, train acc: 0.4375, valid loss: 2.0542, valid acc: 0.3323
Iter: 54500, train loss: 1.7999, train acc: 0.4219, valid loss: 2.0545, valid acc: 0.3315
Iter: 54600, train loss: 1.8239, train acc: 0.3438, valid loss: 2.0539, valid acc: 0.3315
Iter: 54700, train loss: 2.0700, train acc: 0.2969, valid loss: 2.0535, valid acc: 0.3308
Iter: 54800, train loss: 1.6473, train acc: 0.4688, valid loss: 2.0536, valid acc: 0.3331
Iter: 54900, train loss: 1.8292, train acc: 0.3438, valid loss: 2.0533, valid acc: 0.3323
Iter: 55000, train loss: 2.1421, train acc: 0.2969, valid loss: 2.0536, valid acc: 0.3323
Iter: 55100, train loss: 1.7464, train acc: 0.4062, valid loss: 2.0536, valid acc: 0.3315
Iter: 55200, train loss: 1.7501, train acc: 0.3906, valid loss: 2.0538, valid acc: 0.3315
Iter: 55300, train loss: 1.9643, train acc: 0.3438, valid loss: 2.0542, valid acc: 0.3331
Iter: 55400, train loss: 2.1478, train acc: 0.2969, valid loss: 2.0536, valid acc: 0.3308
Iter: 55500, train loss: 2.1301, train acc: 0.3125, valid loss: 2.0546, valid acc: 0.3323
Iter: 55600, train loss: 2.0145, train acc: 0.3594, valid loss: 2.0541, valid acc: 0.3331
Iter: 55700, train loss: 2.1134, train acc: 0.2500, valid loss: 2.0536, valid acc: 0.3323
Iter: 55800, train loss: 2.0183, train acc: 0.3438, valid loss: 2.0529, valid acc: 0.3323
Iter: 55900, train loss: 1.7251, train acc: 0.4062, valid loss: 2.0521, valid acc: 0.3308
Iter: 56000, train loss: 2.0052, train acc: 0.3125, valid loss: 2.0522, valid acc: 0.3315
Iter: 56100, train loss: 1.8777, train acc: 0.3281, valid loss: 2.0520, valid acc: 0.3323
Iter: 56200, train loss: 1.8068, train acc: 0.3906, valid loss: 2.0517, valid acc: 0.3308
Iter: 56300, train loss: 1.9346, train acc: 0.3438, valid loss: 2.0517, valid acc: 0.3308
Iter: 56400, train loss: 2.1134, train acc: 0.3281, valid loss: 2.0518, valid acc: 0.3308
Iter: 56500, train loss: 1.9647, train acc: 0.3594, valid loss: 2.0519, valid acc: 0.3338
Iter: 56600, train loss: 1.9157, train acc: 0.3750, valid loss: 2.0514, valid acc: 0.3315
Iter: 56700, train loss: 1.9804, train acc: 0.3438, valid loss: 2.0515, valid acc: 0.3331
Iter: 56800, train loss: 1.9488, train acc: 0.3438, valid loss: 2.0521, valid acc: 0.3338
Iter: 56900, train loss: 1.7919, train acc: 0.5000, valid loss: 2.0522, valid acc: 0.3331
Iter: 57000, train loss: 2.1003, train acc: 0.2656, valid loss: 2.0526, valid acc: 0.3315
Iter: 57100, train loss: 2.2966, train acc: 0.2500, valid loss: 2.0527, valid acc: 0.3323
Iter: 57200, train loss: 1.9504, train acc: 0.3438, valid loss: 2.0522, valid acc: 0.3323
Iter: 57300, train loss: 1.9644, train acc: 0.3438, valid loss: 2.0525, valid acc: 0.3315
Iter: 57400, train loss: 2.0974, train acc: 0.3594, valid loss: 2.0519, valid acc: 0.3323
Iter: 57500, train loss: 2.1476, train acc: 0.2969, valid loss: 2.0524, valid acc: 0.3300
Iter: 57600, train loss: 1.9044, train acc: 0.4062, valid loss: 2.0518, valid acc: 0.3323
Iter: 57700, train loss: 1.9672, train acc: 0.3125, valid loss: 2.0519, valid acc: 0.3300
Iter: 57800, train loss: 2.0814, train acc: 0.3438, valid loss: 2.0519, valid acc: 0.3308
Iter: 57900, train loss: 2.0545, train acc: 0.3594, valid loss: 2.0514, valid acc: 0.3292
Iter: 58000, train loss: 1.9203, train acc: 0.3906, valid loss: 2.0514, valid acc: 0.3308
Iter: 58100, train loss: 1.9356, train acc: 0.3594, valid loss: 2.0513, valid acc: 0.3331
Iter: 58200, train loss: 2.0967, train acc: 0.3125, valid loss: 2.0514, valid acc: 0.3323
Iter: 58300, train loss: 1.7919, train acc: 0.4219, valid loss: 2.0516, valid acc: 0.3346
Iter: 58400, train loss: 2.0669, train acc: 0.3438, valid loss: 2.0509, valid acc: 0.3346
Iter: 58500, train loss: 2.0300, train acc: 0.2812, valid loss: 2.0503, valid acc: 0.3331
Iter: 58600, train loss: 1.8817, train acc: 0.2969, valid loss: 2.0500, valid acc: 0.3346
Iter: 58700, train loss: 1.8243, train acc: 0.3906, valid loss: 2.0504, valid acc: 0.3369
Iter: 58800, train loss: 1.8444, train acc: 0.3125, valid loss: 2.0508, valid acc: 0.3346
Iter: 58900, train loss: 1.6975, train acc: 0.3906, valid loss: 2.0505, valid acc: 0.3354
Iter: 59000, train loss: 1.7170, train acc: 0.3906, valid loss: 2.0509, valid acc: 0.3362
Iter: 59100, train loss: 2.0347, train acc: 0.3438, valid loss: 2.0506, valid acc: 0.3354
Iter: 59200, train loss: 1.9309, train acc: 0.3281, valid loss: 2.0501, valid acc: 0.3338
Iter: 59300, train loss: 2.0663, train acc: 0.2969, valid loss: 2.0500, valid acc: 0.3338
Iter: 59400, train loss: 1.7471, train acc: 0.4219, valid loss: 2.0505, valid acc: 0.3323
Iter: 59500, train loss: 1.7423, train acc: 0.4531, valid loss: 2.0510, valid acc: 0.3331
Iter: 59600, train loss: 1.9231, train acc: 0.3281, valid loss: 2.0515, valid acc: 0.3323
Iter: 59700, train loss: 1.9211, train acc: 0.3594, valid loss: 2.0511, valid acc: 0.3346
Iter: 59800, train loss: 2.1032, train acc: 0.3125, valid loss: 2.0510, valid acc: 0.3369
Iter: 59900, train loss: 1.9130, train acc: 0.3906, valid loss: 2.0507, valid acc: 0.3346
Iter: 60000, train loss: 2.0040, train acc: 0.2812, valid loss: 2.0513, valid acc: 0.3346
Iter: 60100, train loss: 2.0141, train acc: 0.2500, valid loss: 2.0510, valid acc: 0.3338
Iter: 60200, train loss: 1.7886, train acc: 0.3594, valid loss: 2.0503, valid acc: 0.3354
Iter: 60300, train loss: 1.9814, train acc: 0.3281, valid loss: 2.0498, valid acc: 0.3346
Iter: 60400, train loss: 1.8917, train acc: 0.4531, valid loss: 2.0497, valid acc: 0.3331
Iter: 60500, train loss: 1.7624, train acc: 0.4375, valid loss: 2.0499, valid acc: 0.3338
Iter: 60600, train loss: 1.7921, train acc: 0.3906, valid loss: 2.0499, valid acc: 0.3338
Iter: 60700, train loss: 1.8399, train acc: 0.3281, valid loss: 2.0493, valid acc: 0.3354
Iter: 60800, train loss: 2.0112, train acc: 0.3438, valid loss: 2.0484, valid acc: 0.3346
Iter: 60900, train loss: 2.1186, train acc: 0.3594, valid loss: 2.0489, valid acc: 0.3346
Iter: 61000, train loss: 1.8964, train acc: 0.4062, valid loss: 2.0486, valid acc: 0.3338
Iter: 61100, train loss: 1.8086, train acc: 0.4062, valid loss: 2.0486, valid acc: 0.3331
Iter: 61200, train loss: 2.0130, train acc: 0.2812, valid loss: 2.0485, valid acc: 0.3323
Iter: 61300, train loss: 1.9256, train acc: 0.4375, valid loss: 2.0479, valid acc: 0.3346
Iter: 61400, train loss: 1.9197, train acc: 0.3750, valid loss: 2.0478, valid acc: 0.3338
Iter: 61500, train loss: 1.8220, train acc: 0.4688, valid loss: 2.0470, valid acc: 0.3362
Iter: 61600, train loss: 2.0649, train acc: 0.3594, valid loss: 2.0474, valid acc: 0.3354
Iter: 61700, train loss: 2.1048, train acc: 0.3281, valid loss: 2.0473, valid acc: 0.3362
Iter: 61800, train loss: 1.9203, train acc: 0.3281, valid loss: 2.0471, valid acc: 0.3346
Iter: 61900, train loss: 1.6880, train acc: 0.4844, valid loss: 2.0471, valid acc: 0.3362
Iter: 62000, train loss: 1.9273, train acc: 0.3594, valid loss: 2.0476, valid acc: 0.3362
Iter: 62100, train loss: 1.7362, train acc: 0.4531, valid loss: 2.0487, valid acc: 0.3354
Iter: 62200, train loss: 2.1917, train acc: 0.3125, valid loss: 2.0479, valid acc: 0.3346
Iter: 62300, train loss: 2.0612, train acc: 0.4219, valid loss: 2.0473, valid acc: 0.3377
Iter: 62400, train loss: 1.7299, train acc: 0.3594, valid loss: 2.0479, valid acc: 0.3385
Iter: 62500, train loss: 1.9087, train acc: 0.3906, valid loss: 2.0484, valid acc: 0.3377
Iter: 62600, train loss: 1.9194, train acc: 0.4062, valid loss: 2.0487, valid acc: 0.3369
Iter: 62700, train loss: 2.1149, train acc: 0.2500, valid loss: 2.0487, valid acc: 0.3354
Iter: 62800, train loss: 2.1447, train acc: 0.2969, valid loss: 2.0482, valid acc: 0.3385
Iter: 62900, train loss: 1.7908, train acc: 0.3906, valid loss: 2.0480, valid acc: 0.3354
Iter: 63000, train loss: 2.0091, train acc: 0.3125, valid loss: 2.0479, valid acc: 0.3369
Iter: 63100, train loss: 1.9785, train acc: 0.3594, valid loss: 2.0478, valid acc: 0.3392
Iter: 63200, train loss: 1.9020, train acc: 0.3125, valid loss: 2.0477, valid acc: 0.3385
Iter: 63300, train loss: 1.8027, train acc: 0.4062, valid loss: 2.0476, valid acc: 0.3392
Iter: 63400, train loss: 2.0943, train acc: 0.4062, valid loss: 2.0472, valid acc: 0.3385
Iter: 63500, train loss: 1.9362, train acc: 0.3125, valid loss: 2.0464, valid acc: 0.3392
Iter: 63600, train loss: 2.1732, train acc: 0.3281, valid loss: 2.0463, valid acc: 0.3392
Iter: 63700, train loss: 1.9479, train acc: 0.3594, valid loss: 2.0463, valid acc: 0.3377
Iter: 63800, train loss: 1.9293, train acc: 0.3594, valid loss: 2.0455, valid acc: 0.3392
Iter: 63900, train loss: 2.1341, train acc: 0.3438, valid loss: 2.0460, valid acc: 0.3385
Iter: 64000, train loss: 1.8523, train acc: 0.4062, valid loss: 2.0456, valid acc: 0.3392
Iter: 64100, train loss: 2.0246, train acc: 0.4062, valid loss: 2.0459, valid acc: 0.3400
Iter: 64200, train loss: 2.1156, train acc: 0.2812, valid loss: 2.0455, valid acc: 0.3385
Iter: 64300, train loss: 1.7121, train acc: 0.4531, valid loss: 2.0452, valid acc: 0.3377
Iter: 64400, train loss: 1.8997, train acc: 0.3438, valid loss: 2.0460, valid acc: 0.3392
Iter: 64500, train loss: 1.7306, train acc: 0.4844, valid loss: 2.0463, valid acc: 0.3385
Iter: 64600, train loss: 1.9292, train acc: 0.3438, valid loss: 2.0466, valid acc: 0.3362
Iter: 64700, train loss: 1.7958, train acc: 0.4688, valid loss: 2.0464, valid acc: 0.3377
Iter: 64800, train loss: 1.9594, train acc: 0.3594, valid loss: 2.0458, valid acc: 0.3377
Iter: 64900, train loss: 1.8723, train acc: 0.3281, valid loss: 2.0463, valid acc: 0.3392
Iter: 65000, train loss: 1.7522, train acc: 0.4219, valid loss: 2.0458, valid acc: 0.3385
Iter: 65100, train loss: 2.0934, train acc: 0.2812, valid loss: 2.0464, valid acc: 0.3377
Iter: 65200, train loss: 1.8785, train acc: 0.3281, valid loss: 2.0463, valid acc: 0.3369
Iter: 65300, train loss: 2.1260, train acc: 0.3281, valid loss: 2.0466, valid acc: 0.3369
Iter: 65400, train loss: 2.1621, train acc: 0.3281, valid loss: 2.0464, valid acc: 0.3362
Iter: 65500, train loss: 1.9549, train acc: 0.3750, valid loss: 2.0454, valid acc: 0.3400
Iter: 65600, train loss: 1.8670, train acc: 0.4219, valid loss: 2.0457, valid acc: 0.3392
Iter: 65700, train loss: 1.9729, train acc: 0.2812, valid loss: 2.0459, valid acc: 0.3385
Iter: 65800, train loss: 1.8746, train acc: 0.3438, valid loss: 2.0464, valid acc: 0.3392
Iter: 65900, train loss: 1.9420, train acc: 0.2812, valid loss: 2.0470, valid acc: 0.3385
Iter: 66000, train loss: 2.0055, train acc: 0.3281, valid loss: 2.0470, valid acc: 0.3369
Iter: 66100, train loss: 2.1261, train acc: 0.3281, valid loss: 2.0473, valid acc: 0.3392
Iter: 66200, train loss: 1.7858, train acc: 0.4375, valid loss: 2.0476, valid acc: 0.3369
Iter: 66300, train loss: 1.9572, train acc: 0.2812, valid loss: 2.0465, valid acc: 0.3369
Iter: 66400, train loss: 2.1380, train acc: 0.2500, valid loss: 2.0463, valid acc: 0.3369
Iter: 66500, train loss: 2.2725, train acc: 0.3281, valid loss: 2.0451, valid acc: 0.3392
Iter: 66600, train loss: 1.9342, train acc: 0.4531, valid loss: 2.0452, valid acc: 0.3392
Iter: 66700, train loss: 1.9557, train acc: 0.3750, valid loss: 2.0444, valid acc: 0.3392
Iter: 66800, train loss: 2.1373, train acc: 0.2969, valid loss: 2.0451, valid acc: 0.3377
Iter: 66900, train loss: 2.1286, train acc: 0.2812, valid loss: 2.0450, valid acc: 0.3415
Iter: 67000, train loss: 2.1457, train acc: 0.2969, valid loss: 2.0452, valid acc: 0.3392
Iter: 67100, train loss: 1.9032, train acc: 0.3438, valid loss: 2.0450, valid acc: 0.3392
Iter: 67200, train loss: 1.7204, train acc: 0.3438, valid loss: 2.0451, valid acc: 0.3369
Iter: 67300, train loss: 1.9921, train acc: 0.4062, valid loss: 2.0450, valid acc: 0.3385
Iter: 67400, train loss: 2.0975, train acc: 0.2969, valid loss: 2.0446, valid acc: 0.3385
Iter: 67500, train loss: 2.0223, train acc: 0.3281, valid loss: 2.0449, valid acc: 0.3400
Iter: 67600, train loss: 1.6498, train acc: 0.4375, valid loss: 2.0439, valid acc: 0.3400
Iter: 67700, train loss: 1.7261, train acc: 0.4531, valid loss: 2.0437, valid acc: 0.3392
Iter: 67800, train loss: 2.1533, train acc: 0.2812, valid loss: 2.0444, valid acc: 0.3392
Iter: 67900, train loss: 1.9573, train acc: 0.3750, valid loss: 2.0447, valid acc: 0.3362
Iter: 68000, train loss: 1.9924, train acc: 0.3750, valid loss: 2.0446, valid acc: 0.3369
Iter: 68100, train loss: 1.8662, train acc: 0.4062, valid loss: 2.0451, valid acc: 0.3385
Iter: 68200, train loss: 1.6714, train acc: 0.5000, valid loss: 2.0448, valid acc: 0.3369
Iter: 68300, train loss: 1.8955, train acc: 0.3906, valid loss: 2.0447, valid acc: 0.3362
Iter: 68400, train loss: 1.9509, train acc: 0.2656, valid loss: 2.0445, valid acc: 0.3369
Iter: 68500, train loss: 1.9934, train acc: 0.3906, valid loss: 2.0445, valid acc: 0.3377
Iter: 68600, train loss: 2.1673, train acc: 0.3125, valid loss: 2.0446, valid acc: 0.3377
Iter: 68700, train loss: 1.9802, train acc: 0.3438, valid loss: 2.0455, valid acc: 0.3377
Iter: 68800, train loss: 1.9931, train acc: 0.2812, valid loss: 2.0443, valid acc: 0.3377
Iter: 68900, train loss: 1.9326, train acc: 0.3750, valid loss: 2.0443, valid acc: 0.3354
Iter: 69000, train loss: 1.8492, train acc: 0.4375, valid loss: 2.0445, valid acc: 0.3362
Iter: 69100, train loss: 2.0122, train acc: 0.3906, valid loss: 2.0444, valid acc: 0.3392
Iter: 69200, train loss: 1.9746, train acc: 0.4375, valid loss: 2.0442, valid acc: 0.3385
Iter: 69300, train loss: 1.8297, train acc: 0.3281, valid loss: 2.0442, valid acc: 0.3369
Iter: 69400, train loss: 1.8375, train acc: 0.4062, valid loss: 2.0442, valid acc: 0.3369
Iter: 69500, train loss: 1.9734, train acc: 0.3281, valid loss: 2.0443, valid acc: 0.3362
Iter: 69600, train loss: 1.8581, train acc: 0.3125, valid loss: 2.0445, valid acc: 0.3392
Iter: 69700, train loss: 1.7812, train acc: 0.4375, valid loss: 2.0439, valid acc: 0.3408
Iter: 69800, train loss: 2.0570, train acc: 0.3594, valid loss: 2.0438, valid acc: 0.3392
Iter: 69900, train loss: 2.0582, train acc: 0.2969, valid loss: 2.0445, valid acc: 0.3392
Iter: 70000, train loss: 2.1348, train acc: 0.3281, valid loss: 2.0443, valid acc: 0.3392
Iter: 70100, train loss: 2.0892, train acc: 0.3594, valid loss: 2.0435, valid acc: 0.3369
Iter: 70200, train loss: 2.0397, train acc: 0.3125, valid loss: 2.0433, valid acc: 0.3400
Iter: 70300, train loss: 1.8934, train acc: 0.3125, valid loss: 2.0438, valid acc: 0.3400
Iter: 70400, train loss: 1.8715, train acc: 0.4844, valid loss: 2.0431, valid acc: 0.3392
Iter: 70500, train loss: 2.1250, train acc: 0.2812, valid loss: 2.0434, valid acc: 0.3408
Iter: 70600, train loss: 1.9889, train acc: 0.3281, valid loss: 2.0437, valid acc: 0.3377
Iter: 70700, train loss: 1.8686, train acc: 0.3750, valid loss: 2.0437, valid acc: 0.3377
Iter: 70800, train loss: 2.0167, train acc: 0.3906, valid loss: 2.0438, valid acc: 0.3369
Iter: 70900, train loss: 2.1395, train acc: 0.3281, valid loss: 2.0432, valid acc: 0.3362
Iter: 71000, train loss: 1.7836, train acc: 0.4375, valid loss: 2.0432, valid acc: 0.3354
Iter: 71100, train loss: 2.1662, train acc: 0.3125, valid loss: 2.0434, valid acc: 0.3408
Iter: 71200, train loss: 1.9224, train acc: 0.2656, valid loss: 2.0432, valid acc: 0.3392
Iter: 71300, train loss: 1.9768, train acc: 0.3125, valid loss: 2.0427, valid acc: 0.3385
Iter: 71400, train loss: 1.9570, train acc: 0.3438, valid loss: 2.0427, valid acc: 0.3392
Iter: 71500, train loss: 2.0546, train acc: 0.3125, valid loss: 2.0431, valid acc: 0.3377
Iter: 71600, train loss: 1.6049, train acc: 0.4688, valid loss: 2.0428, valid acc: 0.3408
Iter: 71700, train loss: 1.9710, train acc: 0.3281, valid loss: 2.0429, valid acc: 0.3392
Iter: 71800, train loss: 2.1541, train acc: 0.3438, valid loss: 2.0429, valid acc: 0.3392
Iter: 71900, train loss: 1.6730, train acc: 0.4062, valid loss: 2.0434, valid acc: 0.3400
Iter: 72000, train loss: 1.9205, train acc: 0.3281, valid loss: 2.0434, valid acc: 0.3415
Iter: 72100, train loss: 2.3841, train acc: 0.2656, valid loss: 2.0432, valid acc: 0.3400
Iter: 72200, train loss: 2.0519, train acc: 0.2656, valid loss: 2.0433, valid acc: 0.3408
Iter: 72300, train loss: 1.8326, train acc: 0.4375, valid loss: 2.0429, valid acc: 0.3400
Iter: 72400, train loss: 1.8514, train acc: 0.4688, valid loss: 2.0422, valid acc: 0.3438
Iter: 72500, train loss: 2.1329, train acc: 0.2812, valid loss: 2.0421, valid acc: 0.3415
Iter: 72600, train loss: 2.0214, train acc: 0.3438, valid loss: 2.0421, valid acc: 0.3438
Iter: 72700, train loss: 1.8381, train acc: 0.3906, valid loss: 2.0423, valid acc: 0.3438
Iter: 72800, train loss: 2.3181, train acc: 0.2656, valid loss: 2.0417, valid acc: 0.3469
Iter: 72900, train loss: 2.1797, train acc: 0.3281, valid loss: 2.0414, valid acc: 0.3423
Iter: 73000, train loss: 2.1637, train acc: 0.3281, valid loss: 2.0420, valid acc: 0.3408
Iter: 73100, train loss: 1.8939, train acc: 0.3750, valid loss: 2.0416, valid acc: 0.3362
Iter: 73200, train loss: 1.9212, train acc: 0.3594, valid loss: 2.0417, valid acc: 0.3392
Iter: 73300, train loss: 1.9740, train acc: 0.3594, valid loss: 2.0416, valid acc: 0.3392
Iter: 73400, train loss: 1.9162, train acc: 0.3594, valid loss: 2.0416, valid acc: 0.3377
Iter: 73500, train loss: 2.0023, train acc: 0.3125, valid loss: 2.0418, valid acc: 0.3392
Iter: 73600, train loss: 1.8161, train acc: 0.2812, valid loss: 2.0417, valid acc: 0.3438
Iter: 73700, train loss: 1.7643, train acc: 0.4531, valid loss: 2.0419, valid acc: 0.3438
Iter: 73800, train loss: 1.9340, train acc: 0.3594, valid loss: 2.0415, valid acc: 0.3408
Iter: 73900, train loss: 1.9711, train acc: 0.3125, valid loss: 2.0409, valid acc: 0.3377
Iter: 74000, train loss: 1.9824, train acc: 0.3438, valid loss: 2.0411, valid acc: 0.3392
Iter: 74100, train loss: 1.9336, train acc: 0.3125, valid loss: 2.0417, valid acc: 0.3438
Iter: 74200, train loss: 2.2059, train acc: 0.2500, valid loss: 2.0410, valid acc: 0.3423
Iter: 74300, train loss: 1.8078, train acc: 0.4531, valid loss: 2.0423, valid acc: 0.3431
Iter: 74400, train loss: 2.1414, train acc: 0.2656, valid loss: 2.0419, valid acc: 0.3431
Iter: 74500, train loss: 2.0254, train acc: 0.4062, valid loss: 2.0425, valid acc: 0.3431
Iter: 74600, train loss: 1.7712, train acc: 0.2969, valid loss: 2.0425, valid acc: 0.3431
Iter: 74700, train loss: 2.0629, train acc: 0.3281, valid loss: 2.0422, valid acc: 0.3423
Iter: 74800, train loss: 2.0778, train acc: 0.2500, valid loss: 2.0423, valid acc: 0.3408
Iter: 74900, train loss: 2.0184, train acc: 0.3438, valid loss: 2.0430, valid acc: 0.3423
Iter: 75000, train loss: 1.9740, train acc: 0.3281, valid loss: 2.0426, valid acc: 0.3423
Iter: 75100, train loss: 2.0245, train acc: 0.2812, valid loss: 2.0424, valid acc: 0.3392
Iter: 75200, train loss: 1.9021, train acc: 0.3750, valid loss: 2.0424, valid acc: 0.3400
Iter: 75300, train loss: 2.0045, train acc: 0.3438, valid loss: 2.0424, valid acc: 0.3408
Iter: 75400, train loss: 2.0558, train acc: 0.2656, valid loss: 2.0420, valid acc: 0.3423
Iter: 75500, train loss: 1.7362, train acc: 0.3594, valid loss: 2.0423, valid acc: 0.3392
Iter: 75600, train loss: 1.9182, train acc: 0.3906, valid loss: 2.0411, valid acc: 0.3400
Iter: 75700, train loss: 2.0569, train acc: 0.3594, valid loss: 2.0410, valid acc: 0.3385
Iter: 75800, train loss: 1.9271, train acc: 0.3125, valid loss: 2.0403, valid acc: 0.3369
Iter: 75900, train loss: 2.0120, train acc: 0.3125, valid loss: 2.0404, valid acc: 0.3408
Iter: 76000, train loss: 1.8673, train acc: 0.4375, valid loss: 2.0401, valid acc: 0.3400
Iter: 76100, train loss: 1.7927, train acc: 0.3594, valid loss: 2.0404, valid acc: 0.3392
Iter: 76200, train loss: 1.6877, train acc: 0.4531, valid loss: 2.0398, valid acc: 0.3385
Iter: 76300, train loss: 1.9764, train acc: 0.3125, valid loss: 2.0392, valid acc: 0.3423
Iter: 76400, train loss: 2.0074, train acc: 0.2969, valid loss: 2.0387, valid acc: 0.3423
Iter: 76500, train loss: 1.9192, train acc: 0.4219, valid loss: 2.0385, valid acc: 0.3438
Iter: 76600, train loss: 2.3711, train acc: 0.2344, valid loss: 2.0384, valid acc: 0.3454
Iter: 76700, train loss: 2.0120, train acc: 0.2969, valid loss: 2.0391, valid acc: 0.3423
Iter: 76800, train loss: 2.1746, train acc: 0.3125, valid loss: 2.0389, valid acc: 0.3438
Iter: 76900, train loss: 1.9289, train acc: 0.3281, valid loss: 2.0393, valid acc: 0.3423
Iter: 77000, train loss: 2.0373, train acc: 0.3125, valid loss: 2.0390, valid acc: 0.3431
Iter: 77100, train loss: 1.6980, train acc: 0.4219, valid loss: 2.0392, valid acc: 0.3415
Iter: 77200, train loss: 1.9150, train acc: 0.2500, valid loss: 2.0395, valid acc: 0.3408
Iter: 77300, train loss: 1.7961, train acc: 0.5000, valid loss: 2.0394, valid acc: 0.3431
Iter: 77400, train loss: 1.8527, train acc: 0.4062, valid loss: 2.0393, valid acc: 0.3438
Iter: 77500, train loss: 1.9756, train acc: 0.3125, valid loss: 2.0389, valid acc: 0.3415
Iter: 77600, train loss: 2.0298, train acc: 0.3125, valid loss: 2.0389, valid acc: 0.3423
Iter: 77700, train loss: 2.0321, train acc: 0.3281, valid loss: 2.0394, valid acc: 0.3423
Iter: 77800, train loss: 1.7407, train acc: 0.4375, valid loss: 2.0398, valid acc: 0.3423
Iter: 77900, train loss: 1.9628, train acc: 0.3906, valid loss: 2.0392, valid acc: 0.3423
Iter: 78000, train loss: 2.1479, train acc: 0.2969, valid loss: 2.0388, valid acc: 0.3446
Iter: 78100, train loss: 2.0844, train acc: 0.3125, valid loss: 2.0389, valid acc: 0.3431
Iter: 78200, train loss: 2.0096, train acc: 0.3438, valid loss: 2.0394, valid acc: 0.3431
Iter: 78300, train loss: 2.1887, train acc: 0.2656, valid loss: 2.0393, valid acc: 0.3462
Iter: 78400, train loss: 2.0475, train acc: 0.3750, valid loss: 2.0395, valid acc: 0.3454
Iter: 78500, train loss: 1.8925, train acc: 0.3906, valid loss: 2.0392, valid acc: 0.3446
Iter: 78600, train loss: 1.8378, train acc: 0.3750, valid loss: 2.0389, valid acc: 0.3438
Iter: 78700, train loss: 1.9414, train acc: 0.3906, valid loss: 2.0389, valid acc: 0.3438
Iter: 78800, train loss: 2.1433, train acc: 0.3281, valid loss: 2.0396, valid acc: 0.3438
Iter: 78900, train loss: 2.2317, train acc: 0.2812, valid loss: 2.0391, valid acc: 0.3454
Iter: 79000, train loss: 2.0985, train acc: 0.3281, valid loss: 2.0388, valid acc: 0.3423
Iter: 79100, train loss: 1.9753, train acc: 0.2812, valid loss: 2.0391, valid acc: 0.3431
Iter: 79200, train loss: 1.8457, train acc: 0.4062, valid loss: 2.0393, valid acc: 0.3431
Iter: 79300, train loss: 1.9890, train acc: 0.4375, valid loss: 2.0386, valid acc: 0.3446
Iter: 79400, train loss: 1.9360, train acc: 0.5000, valid loss: 2.0387, valid acc: 0.3454
Iter: 79500, train loss: 1.8161, train acc: 0.4062, valid loss: 2.0388, valid acc: 0.3462
Iter: 79600, train loss: 1.9888, train acc: 0.3750, valid loss: 2.0389, valid acc: 0.3462
Iter: 79700, train loss: 1.9110, train acc: 0.4062, valid loss: 2.0392, valid acc: 0.3477
Iter: 79800, train loss: 1.8685, train acc: 0.2812, valid loss: 2.0394, valid acc: 0.3477
Iter: 79900, train loss: 1.7608, train acc: 0.4062, valid loss: 2.0390, valid acc: 0.3477
Iter: 80000, train loss: 2.0167, train acc: 0.2969, valid loss: 2.0394, valid acc: 0.3485
Iter: 80100, train loss: 1.9517, train acc: 0.3438, valid loss: 2.0395, valid acc: 0.3485
Iter: 80200, train loss: 1.9711, train acc: 0.3750, valid loss: 2.0404, valid acc: 0.3469
Iter: 80300, train loss: 1.7113, train acc: 0.3281, valid loss: 2.0394, valid acc: 0.3477
Iter: 80400, train loss: 2.0412, train acc: 0.3438, valid loss: 2.0389, valid acc: 0.3446
Iter: 80500, train loss: 1.9069, train acc: 0.3906, valid loss: 2.0392, valid acc: 0.3454
Iter: 80600, train loss: 1.9385, train acc: 0.2656, valid loss: 2.0385, valid acc: 0.3462
Iter: 80700, train loss: 2.0638, train acc: 0.2344, valid loss: 2.0378, valid acc: 0.3446
Iter: 80800, train loss: 1.7964, train acc: 0.3906, valid loss: 2.0373, valid acc: 0.3477
Iter: 80900, train loss: 1.7637, train acc: 0.4219, valid loss: 2.0377, valid acc: 0.3492
Iter: 81000, train loss: 1.8344, train acc: 0.3750, valid loss: 2.0375, valid acc: 0.3469
Iter: 81100, train loss: 2.3447, train acc: 0.3125, valid loss: 2.0375, valid acc: 0.3462
Iter: 81200, train loss: 2.1133, train acc: 0.3750, valid loss: 2.0368, valid acc: 0.3446
Iter: 81300, train loss: 2.0493, train acc: 0.2812, valid loss: 2.0363, valid acc: 0.3454
Iter: 81400, train loss: 1.6294, train acc: 0.4531, valid loss: 2.0373, valid acc: 0.3438
Iter: 81500, train loss: 1.9266, train acc: 0.3281, valid loss: 2.0379, valid acc: 0.3446
Iter: 81600, train loss: 2.0772, train acc: 0.3594, valid loss: 2.0377, valid acc: 0.3446
Iter: 81700, train loss: 2.0849, train acc: 0.3594, valid loss: 2.0377, valid acc: 0.3454
Iter: 81800, train loss: 1.8309, train acc: 0.4062, valid loss: 2.0372, valid acc: 0.3477
Iter: 81900, train loss: 1.9787, train acc: 0.3750, valid loss: 2.0373, valid acc: 0.3454
Iter: 82000, train loss: 1.9249, train acc: 0.2969, valid loss: 2.0371, valid acc: 0.3446
Iter: 82100, train loss: 1.9866, train acc: 0.3438, valid loss: 2.0374, valid acc: 0.3462
Iter: 82200, train loss: 2.1528, train acc: 0.3750, valid loss: 2.0372, valid acc: 0.3462
Iter: 82300, train loss: 2.0670, train acc: 0.2812, valid loss: 2.0368, valid acc: 0.3454
Iter: 82400, train loss: 1.6690, train acc: 0.3906, valid loss: 2.0367, valid acc: 0.3431
Iter: 82500, train loss: 2.0328, train acc: 0.3906, valid loss: 2.0363, valid acc: 0.3438
Iter: 82600, train loss: 1.9306, train acc: 0.3125, valid loss: 2.0366, valid acc: 0.3431
Iter: 82700, train loss: 2.1044, train acc: 0.2969, valid loss: 2.0363, valid acc: 0.3438
Iter: 82800, train loss: 1.9813, train acc: 0.3438, valid loss: 2.0371, valid acc: 0.3462
Iter: 82900, train loss: 2.0136, train acc: 0.2812, valid loss: 2.0369, valid acc: 0.3438
Iter: 83000, train loss: 1.7592, train acc: 0.4375, valid loss: 2.0370, valid acc: 0.3469
Iter: 83100, train loss: 1.8207, train acc: 0.4531, valid loss: 2.0366, valid acc: 0.3477
Iter: 83200, train loss: 2.0538, train acc: 0.3594, valid loss: 2.0363, valid acc: 0.3446
Iter: 83300, train loss: 1.8555, train acc: 0.3594, valid loss: 2.0368, valid acc: 0.3438
Iter: 83400, train loss: 2.0423, train acc: 0.2656, valid loss: 2.0372, valid acc: 0.3431
Iter: 83500, train loss: 2.0471, train acc: 0.3125, valid loss: 2.0374, valid acc: 0.3438
Iter: 83600, train loss: 1.7956, train acc: 0.4062, valid loss: 2.0376, valid acc: 0.3415
Iter: 83700, train loss: 1.8993, train acc: 0.3438, valid loss: 2.0372, valid acc: 0.3469
Iter: 83800, train loss: 1.9193, train acc: 0.4219, valid loss: 2.0371, valid acc: 0.3462
Iter: 83900, train loss: 2.0652, train acc: 0.3438, valid loss: 2.0369, valid acc: 0.3446
Iter: 84000, train loss: 1.8803, train acc: 0.3281, valid loss: 2.0371, valid acc: 0.3438
Iter: 84100, train loss: 1.9663, train acc: 0.3750, valid loss: 2.0370, valid acc: 0.3454
Iter: 84200, train loss: 1.8173, train acc: 0.3438, valid loss: 2.0367, valid acc: 0.3454
Iter: 84300, train loss: 2.0168, train acc: 0.3125, valid loss: 2.0358, valid acc: 0.3438
Iter: 84400, train loss: 1.7593, train acc: 0.3750, valid loss: 2.0360, valid acc: 0.3438
Iter: 84500, train loss: 2.2512, train acc: 0.2344, valid loss: 2.0361, valid acc: 0.3423
Iter: 84600, train loss: 2.1088, train acc: 0.2656, valid loss: 2.0362, valid acc: 0.3438
Iter: 84700, train loss: 2.0193, train acc: 0.2969, valid loss: 2.0363, valid acc: 0.3446
Iter: 84800, train loss: 1.7394, train acc: 0.4219, valid loss: 2.0366, valid acc: 0.3438
Iter: 84900, train loss: 2.0966, train acc: 0.2656, valid loss: 2.0369, valid acc: 0.3438
Iter: 85000, train loss: 1.7434, train acc: 0.3438, valid loss: 2.0367, valid acc: 0.3438
Iter: 85100, train loss: 2.0050, train acc: 0.4062, valid loss: 2.0365, valid acc: 0.3438
Iter: 85200, train loss: 1.7710, train acc: 0.4844, valid loss: 2.0363, valid acc: 0.3446
Iter: 85300, train loss: 1.8325, train acc: 0.4375, valid loss: 2.0362, valid acc: 0.3438
Iter: 85400, train loss: 1.9337, train acc: 0.4219, valid loss: 2.0369, valid acc: 0.3446
Iter: 85500, train loss: 1.9393, train acc: 0.3125, valid loss: 2.0381, valid acc: 0.3415
Iter: 85600, train loss: 1.7942, train acc: 0.3750, valid loss: 2.0375, valid acc: 0.3415
Iter: 85700, train loss: 1.7520, train acc: 0.4219, valid loss: 2.0369, valid acc: 0.3431
Iter: 85800, train loss: 1.9317, train acc: 0.3125, valid loss: 2.0370, valid acc: 0.3431
Iter: 85900, train loss: 1.6035, train acc: 0.4844, valid loss: 2.0371, valid acc: 0.3446
Iter: 86000, train loss: 2.2013, train acc: 0.2969, valid loss: 2.0368, valid acc: 0.3438
Iter: 86100, train loss: 1.9733, train acc: 0.2969, valid loss: 2.0361, valid acc: 0.3446
Iter: 86200, train loss: 1.7795, train acc: 0.4219, valid loss: 2.0360, valid acc: 0.3446
Iter: 86300, train loss: 1.8476, train acc: 0.4531, valid loss: 2.0364, valid acc: 0.3477
Iter: 86400, train loss: 2.0673, train acc: 0.2969, valid loss: 2.0358, valid acc: 0.3469
Iter: 86500, train loss: 1.8987, train acc: 0.3906, valid loss: 2.0359, valid acc: 0.3477
Iter: 86600, train loss: 1.5474, train acc: 0.5000, valid loss: 2.0358, valid acc: 0.3454
Iter: 86700, train loss: 1.9250, train acc: 0.3281, valid loss: 2.0355, valid acc: 0.3462
Iter: 86800, train loss: 1.8242, train acc: 0.4062, valid loss: 2.0356, valid acc: 0.3446
Iter: 86900, train loss: 1.7152, train acc: 0.4844, valid loss: 2.0352, valid acc: 0.3454
Iter: 87000, train loss: 2.0522, train acc: 0.2969, valid loss: 2.0342, valid acc: 0.3438
Iter: 87100, train loss: 2.0558, train acc: 0.3125, valid loss: 2.0337, valid acc: 0.3423
Iter: 87200, train loss: 2.0223, train acc: 0.3438, valid loss: 2.0333, valid acc: 0.3431
Iter: 87300, train loss: 1.7785, train acc: 0.4062, valid loss: 2.0340, valid acc: 0.3446
Iter: 87400, train loss: 2.0042, train acc: 0.3594, valid loss: 2.0343, valid acc: 0.3431
Iter: 87500, train loss: 2.1164, train acc: 0.4062, valid loss: 2.0340, valid acc: 0.3454
Iter: 87600, train loss: 2.0152, train acc: 0.3125, valid loss: 2.0344, valid acc: 0.3462
Iter: 87700, train loss: 1.6254, train acc: 0.3906, valid loss: 2.0345, valid acc: 0.3446
Iter: 87800, train loss: 1.8398, train acc: 0.3594, valid loss: 2.0350, valid acc: 0.3438
Iter: 87900, train loss: 2.1105, train acc: 0.3594, valid loss: 2.0346, valid acc: 0.3446
Iter: 88000, train loss: 1.9348, train acc: 0.3750, valid loss: 2.0345, valid acc: 0.3446
Iter: 88100, train loss: 1.8872, train acc: 0.4375, valid loss: 2.0352, valid acc: 0.3462
Iter: 88200, train loss: 1.8335, train acc: 0.4062, valid loss: 2.0350, valid acc: 0.3446
Iter: 88300, train loss: 1.9194, train acc: 0.3281, valid loss: 2.0356, valid acc: 0.3454
Iter: 88400, train loss: 1.6732, train acc: 0.4688, valid loss: 2.0358, valid acc: 0.3462
Iter: 88500, train loss: 1.8223, train acc: 0.4844, valid loss: 2.0370, valid acc: 0.3454
Iter: 88600, train loss: 1.8354, train acc: 0.4219, valid loss: 2.0371, valid acc: 0.3431
Iter: 88700, train loss: 2.1502, train acc: 0.3125, valid loss: 2.0370, valid acc: 0.3446
Iter: 88800, train loss: 1.8167, train acc: 0.3906, valid loss: 2.0372, valid acc: 0.3438
Iter: 88900, train loss: 2.0405, train acc: 0.3125, valid loss: 2.0368, valid acc: 0.3438
Iter: 89000, train loss: 1.8742, train acc: 0.3594, valid loss: 2.0370, valid acc: 0.3446
Iter: 89100, train loss: 2.2785, train acc: 0.2188, valid loss: 2.0362, valid acc: 0.3438
Iter: 89200, train loss: 1.6774, train acc: 0.4062, valid loss: 2.0360, valid acc: 0.3446
Iter: 89300, train loss: 2.1093, train acc: 0.2656, valid loss: 2.0367, valid acc: 0.3446
Iter: 89400, train loss: 1.9530, train acc: 0.2969, valid loss: 2.0356, valid acc: 0.3446
Iter: 89500, train loss: 1.8023, train acc: 0.4062, valid loss: 2.0355, valid acc: 0.3454
Iter: 89600, train loss: 1.9488, train acc: 0.3125, valid loss: 2.0354, valid acc: 0.3462
Iter: 89700, train loss: 2.0481, train acc: 0.3281, valid loss: 2.0351, valid acc: 0.3462
Iter: 89800, train loss: 1.8843, train acc: 0.4062, valid loss: 2.0347, valid acc: 0.3454
Iter: 89900, train loss: 2.3247, train acc: 0.2656, valid loss: 2.0344, valid acc: 0.3469
Iter: 90000, train loss: 1.9482, train acc: 0.3125, valid loss: 2.0354, valid acc: 0.3485
Iter: 90100, train loss: 1.9245, train acc: 0.4219, valid loss: 2.0354, valid acc: 0.3454
Iter: 90200, train loss: 1.9904, train acc: 0.3125, valid loss: 2.0359, valid acc: 0.3438
Iter: 90300, train loss: 1.8735, train acc: 0.3750, valid loss: 2.0353, valid acc: 0.3454
Iter: 90400, train loss: 1.6219, train acc: 0.4844, valid loss: 2.0344, valid acc: 0.3446
Iter: 90500, train loss: 1.8522, train acc: 0.3281, valid loss: 2.0343, valid acc: 0.3438
Iter: 90600, train loss: 1.9452, train acc: 0.4219, valid loss: 2.0338, valid acc: 0.3431
Iter: 90700, train loss: 1.9583, train acc: 0.3750, valid loss: 2.0338, valid acc: 0.3431
Iter: 90800, train loss: 1.9067, train acc: 0.3906, valid loss: 2.0341, valid acc: 0.3423
Iter: 90900, train loss: 1.7274, train acc: 0.5000, valid loss: 2.0336, valid acc: 0.3462
Iter: 91000, train loss: 2.1648, train acc: 0.3750, valid loss: 2.0335, valid acc: 0.3462
Iter: 91100, train loss: 1.8238, train acc: 0.3281, valid loss: 2.0333, valid acc: 0.3446
Iter: 91200, train loss: 2.0181, train acc: 0.3438, valid loss: 2.0334, valid acc: 0.3438
Iter: 91300, train loss: 1.9159, train acc: 0.3906, valid loss: 2.0341, valid acc: 0.3446
Iter: 91400, train loss: 1.6463, train acc: 0.4219, valid loss: 2.0345, valid acc: 0.3462
Iter: 91500, train loss: 1.9695, train acc: 0.3750, valid loss: 2.0353, valid acc: 0.3446
Iter: 91600, train loss: 1.9217, train acc: 0.4219, valid loss: 2.0351, valid acc: 0.3438
Iter: 91700, train loss: 2.0207, train acc: 0.3750, valid loss: 2.0345, valid acc: 0.3431
Iter: 91800, train loss: 2.0635, train acc: 0.3125, valid loss: 2.0337, valid acc: 0.3438
Iter: 91900, train loss: 1.9077, train acc: 0.4531, valid loss: 2.0333, valid acc: 0.3446
Iter: 92000, train loss: 2.2110, train acc: 0.2969, valid loss: 2.0336, valid acc: 0.3438
Iter: 92100, train loss: 1.8878, train acc: 0.3281, valid loss: 2.0342, valid acc: 0.3438
Iter: 92200, train loss: 1.8465, train acc: 0.3438, valid loss: 2.0333, valid acc: 0.3454
Iter: 92300, train loss: 2.2513, train acc: 0.2969, valid loss: 2.0335, valid acc: 0.3438
Iter: 92400, train loss: 2.0383, train acc: 0.3125, valid loss: 2.0330, valid acc: 0.3438
Iter: 92500, train loss: 1.9096, train acc: 0.4038, valid loss: 2.0332, valid acc: 0.3438
Iter: 92600, train loss: 1.7192, train acc: 0.4219, valid loss: 2.0332, valid acc: 0.3462
Iter: 92700, train loss: 1.8701, train acc: 0.3906, valid loss: 2.0338, valid acc: 0.3469
Iter: 92800, train loss: 1.9288, train acc: 0.3125, valid loss: 2.0341, valid acc: 0.3477
Iter: 92900, train loss: 2.2973, train acc: 0.2812, valid loss: 2.0342, valid acc: 0.3454
Iter: 93000, train loss: 1.8449, train acc: 0.3906, valid loss: 2.0337, valid acc: 0.3431
Iter: 93100, train loss: 1.7916, train acc: 0.4531, valid loss: 2.0344, valid acc: 0.3446
Iter: 93200, train loss: 2.0457, train acc: 0.2656, valid loss: 2.0342, valid acc: 0.3431
Iter: 93300, train loss: 2.0277, train acc: 0.3281, valid loss: 2.0344, valid acc: 0.3446
Iter: 93400, train loss: 1.9824, train acc: 0.3750, valid loss: 2.0347, valid acc: 0.3438
Iter: 93500, train loss: 2.0103, train acc: 0.4062, valid loss: 2.0343, valid acc: 0.3438
Iter: 93600, train loss: 1.9966, train acc: 0.3594, valid loss: 2.0347, valid acc: 0.3485
Iter: 93700, train loss: 1.9278, train acc: 0.4219, valid loss: 2.0349, valid acc: 0.3477
Iter: 93800, train loss: 1.7364, train acc: 0.4375, valid loss: 2.0343, valid acc: 0.3462
Iter: 93900, train loss: 1.8577, train acc: 0.4062, valid loss: 2.0341, valid acc: 0.3477
Iter: 94000, train loss: 1.9430, train acc: 0.3594, valid loss: 2.0343, valid acc: 0.3454
Iter: 94100, train loss: 1.8083, train acc: 0.4062, valid loss: 2.0344, valid acc: 0.3454
Iter: 94200, train loss: 2.0103, train acc: 0.2969, valid loss: 2.0339, valid acc: 0.3469
Iter: 94300, train loss: 1.9425, train acc: 0.3281, valid loss: 2.0336, valid acc: 0.3485
Iter: 94400, train loss: 2.0490, train acc: 0.2656, valid loss: 2.0340, valid acc: 0.3469
Iter: 94500, train loss: 1.7873, train acc: 0.4531, valid loss: 2.0340, valid acc: 0.3477
Iter: 94600, train loss: 1.8718, train acc: 0.3906, valid loss: 2.0338, valid acc: 0.3469
Iter: 94700, train loss: 2.0728, train acc: 0.3125, valid loss: 2.0329, valid acc: 0.3485
Iter: 94800, train loss: 1.8107, train acc: 0.4375, valid loss: 2.0335, valid acc: 0.3485
Iter: 94900, train loss: 1.9968, train acc: 0.3906, valid loss: 2.0331, valid acc: 0.3500
Iter: 95000, train loss: 1.9395, train acc: 0.3750, valid loss: 2.0327, valid acc: 0.3485
Iter: 95100, train loss: 2.0808, train acc: 0.2344, valid loss: 2.0327, valid acc: 0.3485
Iter: 95200, train loss: 1.6688, train acc: 0.4688, valid loss: 2.0326, valid acc: 0.3469
Iter: 95300, train loss: 1.9271, train acc: 0.4219, valid loss: 2.0328, valid acc: 0.3469
Iter: 95400, train loss: 1.9006, train acc: 0.3438, valid loss: 2.0323, valid acc: 0.3477
Iter: 95500, train loss: 2.0295, train acc: 0.2812, valid loss: 2.0320, valid acc: 0.3477
Iter: 95600, train loss: 1.9629, train acc: 0.3438, valid loss: 2.0323, valid acc: 0.3485
Iter: 95700, train loss: 2.0598, train acc: 0.3438, valid loss: 2.0328, valid acc: 0.3477
Iter: 95800, train loss: 1.8209, train acc: 0.4219, valid loss: 2.0324, valid acc: 0.3477
Iter: 95900, train loss: 1.8089, train acc: 0.3594, valid loss: 2.0318, valid acc: 0.3462
Iter: 96000, train loss: 1.9600, train acc: 0.3906, valid loss: 2.0316, valid acc: 0.3469
Iter: 96100, train loss: 2.0767, train acc: 0.2969, valid loss: 2.0313, valid acc: 0.3462
Iter: 96200, train loss: 1.8987, train acc: 0.4062, valid loss: 2.0314, valid acc: 0.3469
Iter: 96300, train loss: 1.8986, train acc: 0.3438, valid loss: 2.0310, valid acc: 0.3446
Iter: 96400, train loss: 1.9372, train acc: 0.2969, valid loss: 2.0306, valid acc: 0.3438
Iter: 96500, train loss: 2.0267, train acc: 0.2500, valid loss: 2.0301, valid acc: 0.3462
Iter: 96600, train loss: 1.8814, train acc: 0.3438, valid loss: 2.0308, valid acc: 0.3431
Iter: 96700, train loss: 2.1926, train acc: 0.3281, valid loss: 2.0299, valid acc: 0.3462
Iter: 96800, train loss: 2.0976, train acc: 0.3281, valid loss: 2.0295, valid acc: 0.3446
Iter: 96900, train loss: 2.0160, train acc: 0.3750, valid loss: 2.0300, valid acc: 0.3446
Iter: 97000, train loss: 1.8441, train acc: 0.4688, valid loss: 2.0302, valid acc: 0.3446
Iter: 97100, train loss: 1.6392, train acc: 0.4375, valid loss: 2.0298, valid acc: 0.3469
Iter: 97200, train loss: 1.8799, train acc: 0.3125, valid loss: 2.0299, valid acc: 0.3462
Iter: 97300, train loss: 1.9259, train acc: 0.2500, valid loss: 2.0311, valid acc: 0.3462
Iter: 97400, train loss: 1.8317, train acc: 0.3750, valid loss: 2.0315, valid acc: 0.3469
Iter: 97500, train loss: 1.8786, train acc: 0.3281, valid loss: 2.0322, valid acc: 0.3446
Iter: 97600, train loss: 1.7167, train acc: 0.4688, valid loss: 2.0325, valid acc: 0.3454
Iter: 97700, train loss: 2.0574, train acc: 0.2344, valid loss: 2.0322, valid acc: 0.3446
Iter: 97800, train loss: 1.9885, train acc: 0.3125, valid loss: 2.0318, valid acc: 0.3446
Iter: 97900, train loss: 1.7288, train acc: 0.3750, valid loss: 2.0313, valid acc: 0.3446
Iter: 98000, train loss: 1.8341, train acc: 0.4688, valid loss: 2.0314, valid acc: 0.3438
Iter: 98100, train loss: 1.8286, train acc: 0.4688, valid loss: 2.0313, valid acc: 0.3462
Iter: 98200, train loss: 1.9404, train acc: 0.3438, valid loss: 2.0320, valid acc: 0.3462
Iter: 98300, train loss: 2.0373, train acc: 0.4219, valid loss: 2.0320, valid acc: 0.3454
Iter: 98400, train loss: 1.9528, train acc: 0.3750, valid loss: 2.0311, valid acc: 0.3438
Iter: 98500, train loss: 2.1073, train acc: 0.2969, valid loss: 2.0318, valid acc: 0.3446
Iter: 98600, train loss: 2.1072, train acc: 0.2969, valid loss: 2.0318, valid acc: 0.3423
Iter: 98700, train loss: 2.0115, train acc: 0.3750, valid loss: 2.0313, valid acc: 0.3415
Iter: 98800, train loss: 1.9446, train acc: 0.4531, valid loss: 2.0307, valid acc: 0.3446
Iter: 98900, train loss: 1.9003, train acc: 0.4219, valid loss: 2.0305, valid acc: 0.3438
Iter: 99000, train loss: 1.8636, train acc: 0.3438, valid loss: 2.0294, valid acc: 0.3446
Iter: 99100, train loss: 1.7687, train acc: 0.4062, valid loss: 2.0291, valid acc: 0.3462
Iter: 99200, train loss: 1.9853, train acc: 0.3906, valid loss: 2.0307, valid acc: 0.3469
Iter: 99300, train loss: 1.9001, train acc: 0.4062, valid loss: 2.0300, valid acc: 0.3454
Iter: 99400, train loss: 1.8800, train acc: 0.3594, valid loss: 2.0301, valid acc: 0.3485
Iter: 99500, train loss: 1.8692, train acc: 0.4062, valid loss: 2.0303, valid acc: 0.3485
Iter: 99600, train loss: 2.1136, train acc: 0.2812, valid loss: 2.0303, valid acc: 0.3454
Iter: 99700, train loss: 1.9398, train acc: 0.4219, valid loss: 2.0304, valid acc: 0.3477
Iter: 99800, train loss: 2.1221, train acc: 0.2656, valid loss: 2.0305, valid acc: 0.3477
Iter: 99900, train loss: 1.9991, train acc: 0.3750, valid loss: 2.0303, valid acc: 0.3462
Iter: 100000, train loss: 1.7871, train acc: 0.3281, valid loss: 2.0302, valid acc: 0.3462

In [29]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.legend()
plt.show()



In [30]:
loss_train = np.array(nn.losses['train'], dtype=float)
loss_valid = np.array(nn.losses['valid'], dtype=float)
loss_train.shape, loss_valid.shape


Out[30]:
((100000,), (100000,))

In [31]:
loss_train_norm = (loss_train - loss_train.mean(axis=0))/ loss_train.std(axis=0)
loss_valid_norm = (loss_valid - loss_valid.mean(axis=0))/ loss_valid.std(axis=0)

In [32]:
plt.plot(loss_train_norm, label='Normalized train loss')
plt.plot(loss_valid_norm, label='Normalized valid loss')
plt.legend()
plt.show()



In [33]:
plt.plot(nn.losses['train_acc'], label='Train accuracy')
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.legend()
plt.show()



In [34]:
heading = labels_keys_sorted.copy()
heading.insert(0, 'Id')
heading


Out[34]:
['Id',
 'Blues',
 'Country',
 'Electronic',
 'Folk',
 'International',
 'Jazz',
 'Latin',
 'New_Age',
 'Pop_Rock',
 'Rap',
 'Reggae',
 'RnB',
 'Vocal']

In [35]:
y_pred, y_logits = nn.test(X_test)
y_prob = l.softmax(y_logits)
y_prob.shape, X_test.shape, y_logits.shape, test_y_sample.shape, test_y_sample[:1]


Out[35]:
((10400, 13),
 (10400, 26),
 (10400, 13),
 (10400, 14),
    Id   Blues  Country  Electronic    Folk  International    Jazz   Latin  \
 0   1  0.0964   0.0884      0.0121  0.1004         0.0137  0.1214  0.0883   
 
    New_Age  Pop_Rock     Rap  Reggae     RnB   Vocal  
 0   0.0765    0.0332  0.0445  0.1193  0.1019  0.1038  )

In [36]:
pred_list = []
for Id, pred in enumerate(y_prob):
#     print(Id+1, *pred)
    pred_list.append([Id+1, *pred])

In [37]:
pred_file = open(file='prediction.csv', mode='w')
pred_file.write('\n') # because of the previous line        

for idx in range(len(heading)):
    if idx < len(heading) - 1:
        pred_file.write(heading[idx] + ',')
    else:
        pred_file.write(heading[idx] + '\n')        

# len(test), test[0]
# for key in test:
for i in range(len(pred_list)): # rows
    for j in range(len(pred_list[i])): # cols
        if j < (len(pred_list[i]) - 1):
            pred_file.write(str(pred_list[i][j]))
            pred_file.write(',')
        else: # last item before starting a new line
            pred_file.write(str(pred_list[i][j]) + '\n')        

# pred_file.write(-',')
pred_file.close()

In [38]:
pd.read_csv(filepath_or_buffer='prediction.csv').head()


Out[38]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.004600 0.000731 0.037404 0.001704 0.007374 0.006327 0.019163 0.001979 0.004845 0.445086 0.449938 0.017982 0.002869
1 2 0.018690 0.008279 0.005489 0.008167 0.021861 0.001845 0.066994 0.000876 0.005760 0.239529 0.556683 0.036272 0.029556
2 3 0.009740 0.007488 0.045636 0.002850 0.025559 0.001216 0.032664 0.001185 0.026748 0.355551 0.427653 0.060001 0.003708
3 4 0.036971 0.027555 0.019276 0.019396 0.021533 0.005290 0.035235 0.001698 0.014196 0.086785 0.541544 0.113454 0.077068
4 5 0.002847 0.000487 0.036175 0.000402 0.004637 0.000403 0.007339 0.000240 0.007035 0.557133 0.373086 0.009768 0.000448

In [39]:
pd.read_csv(filepath_or_buffer='prediction.csv').shape, test_y_sample.shape


Out[39]:
((10400, 14), (10400, 14))

In [40]:
test_y_sample.head()


Out[40]:
Id Blues Country Electronic Folk International Jazz Latin New_Age Pop_Rock Rap Reggae RnB Vocal
0 1 0.0964 0.0884 0.0121 0.1004 0.0137 0.1214 0.0883 0.0765 0.0332 0.0445 0.1193 0.1019 0.1038
1 2 0.0121 0.0804 0.0376 0.0289 0.1310 0.0684 0.1044 0.0118 0.1562 0.0585 0.1633 0.1400 0.0073
2 3 0.1291 0.0985 0.0691 0.0356 0.0788 0.0529 0.1185 0.1057 0.1041 0.0075 0.0481 0.1283 0.0238
3 4 0.0453 0.1234 0.0931 0.0126 0.1224 0.0627 0.0269 0.0764 0.0812 0.1337 0.0357 0.0937 0.0930
4 5 0.0600 0.0915 0.0667 0.0947 0.0509 0.0335 0.1251 0.0202 0.1012 0.0365 0.1310 0.0898 0.0991

In [ ]:


In [ ]: