In [2]:
# Data
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import impl.layer as l

# Dataset preparation and pre-processing
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)

X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels


Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz

In [3]:
# Pre-processing: normalizing
def normalize(X):
    # max scale for images 255= 2**8= 8 bit grayscale for each channel
    return (X - X.mean(axis=0)) #/ X.std(axis=0)

X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)

In [6]:
# Model
import impl.layer as l # or from impl.layer import *
from impl.loss import * # import all functions from impl.loss file # import impl.loss as loss_func
from sklearn.utils import shuffle as skshuffle

class FFNN:

    def __init__(self, D, C, H, L):
        self.L = L # layers
        self.C = C # classes
        self.losses = {'train':[], 'train_acc':[], 
                       'valid':[], 'valid_acc':[], 
                       'test':[], 'test_acc':[]}
        
        self.model = []
        self.W_fixed = []
        self.grads = []
        self.dy_prev = np.zeros((1, C))
        self.y_prev = np.zeros((1, C))
        low, high = -1, 1
        
        # Input layer: weights/ biases
        m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), b=np.zeros((1, H)))
        self.model.append(m)
        # Fixed feedback weight
        m = np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.)
        self.W_fixed.append(m)
        # Input layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})

        # Hidden layers: weights/ biases
        m_L = []
        for _ in range(L):
            m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, H)))
            m_L.append(m)
        self.model.append(m_L)
        # Fixed feedback weight
        m_L = []
        for _ in range(L):
            m = np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.)
            m_L.append(m)
        self.W_fixed.append(m_L)
        # Hidden layer: gradients
        grad_L = []
        for _ in range(L):
            grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
        self.grads.append(grad_L)
        
        # Output layer: weights/ biases
        m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, C)))
        self.model.append(m)
        # Fixed feedback weight
        m = np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.)
        self.W_fixed.append(m)
        # Output layer: gradients
        self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
        
    def fc_forward(self, X, W, b):
        out = (X @ W) + b
        cache = (W, X)
        return out, cache

    def fc_backward(self, dout, cache, W_fixed):
        W, X = cache

        dW = X.T @ dout
        db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
        
        dX = dout @ W.T # vanilla Backprop
#         dX = dout @ W_fixed.T # fba backprop

        return dX, dW, db

    def train_forward(self, X, train):
        caches, ys = [], []
        
        # Input layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
        y, nl_cache = l.tanh_forward(X=y)
#         y, nl_cache = l.sigmoid_forward(X=y)
        if train:
            caches.append((fc_cache, nl_cache))
        X = y.copy() # pass to the next layer
        
        # Hidden layers
        fc_caches, nl_caches = [], []
        for layer in range(self.L):
            y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
            y, nl_cache = l.tanh_forward(X=y)
#             y, nl_cache = l.sigmoid_forward(X=y)
            X = y.copy() # pass to next layer
            if train:
                fc_caches.append(fc_cache)
                nl_caches.append(nl_cache)
        if train:
            caches.append((fc_caches, nl_caches)) # caches[1]            
        
        # Output layer
        y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
        y_prob = l.softmax(X=y)
        if train:
            caches.append(fc_cache)

        return y_prob, caches # for backpropating the error

    def cross_entropy(self, y_prob, y_train):
        m = y_prob.shape[0]

        #         prob = l.softmax(y_pred)
        log_like = -np.log(y_prob[range(m), y_train] + l.eps) # to avoid the devision by zero
        data_loss = np.sum(log_like) / m

        return data_loss

    def dcross_entropy(self, y_prob, y_train): # this is equal for both since the reg_loss (noise) derivative is ZERO.
        m = y_prob.shape[0]

        #         grad_y = l.softmax(y_pred)
        grad_y = y_prob
        grad_y[range(m), y_train] -= 1.
        grad_y /= m

        return grad_y

    def loss_function(self, y_prob, y_train):
        
        loss = self.cross_entropy(y_prob, y_train) # softmax is included
        dy = self.dcross_entropy(y_prob, y_train) # dsoftmax is included

        return loss, dy
        
    def train_backward(self, dy, caches, y):
        grads = self.grads.copy() # initialized by Zero in every iteration/epoch
#         dy_prev = self.dy_prev.copy() # for temporal differencing
#         self.dy_prev = dy.copy() # next iteration/ epoch
#         y_prev = self.y_prev.copy() # for temporal differencing
#         self.y_prev = y.copy() # next iteration/ epoch
        
        # Output layer
        fc_cache = caches[2]
        # softmax_backward is included in dcross_entropy.
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[2])
        dy = dX.copy()
# #         dy =  dy @ self.W_fixed[2].T # done
#         dy_prev =  dy_prev @ self.W_fixed[2].T
#         y =  y @ self.W_fixed[2].T # done
#         y_prev =  y_prev @ self.W_fixed[2].T
        grads[2]['W'] = dW
        grads[2]['b'] = db

        # Hidden layer
        fc_caches, nl_caches = caches[1]
        for layer in reversed(range(self.L)):
            dy = l.tanh_backward(cache=nl_caches[layer], dout=dy) # diffable function
#             dy = l.sigmoid_backward(cache=nl_caches[layer], dout=dy) # diffable function
#             dy *= dy - dy_prev # temporal diff instead of differentiable function
#             dy *= y - y_prev # temporal diff instead of differentiable function
            dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer], W_fixed=self.W_fixed[1][layer])
            dy = dX.copy()
# #             dy =  dy @ self.W_fixed[2].T # done
#             dy_prev =  dy_prev @ self.W_fixed[1][layer].T
#             y =  y @ self.W_fixed[1][layer].T # done
#             y_prev =  y_prev @ self.W_fixed[1][layer].T
            grads[1][layer]['W'] = dW
            grads[1][layer]['b'] = db
        
        # Input layer
        fc_cache, nl_cache = caches[0]
        dy = l.tanh_backward(cache=nl_cache, dout=dy) # diffable function
#         dy = l.sigmoid_backward(cache=nl_caches[layer], dout=dy) # diffable function
#         dy *= dy - dy_prev # temporal diff instead of differentiable function
#         dy *= y - y_prev # temporal diff instead of differentiable function
        dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[0])
        grads[0]['W'] = dW
        grads[0]['b'] = db

        return dX, grads
    
    def test(self, X):
        y_prob, _ = self.train_forward(X, train=False)
        
        # if self.mode == 'classification':
        y_pred = np.argmax(y_prob, axis=1) # for loss ==err
        
        return y_pred, y_prob
        
    def get_minibatch(self, X, y, minibatch_size, shuffle):
        minibatches = []

        if shuffle:
            X, y = skshuffle(X, y)

        for i in range(0, X.shape[0], minibatch_size):
            X_mini = X[i:i + minibatch_size]
            y_mini = y[i:i + minibatch_size]
            minibatches.append((X_mini, y_mini))

        return minibatches

    def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
        X_train, y_train = train_set
        X_val, y_val = val_set

        # Epochs
        for iter in range(1, n_iter + 1):

            # Minibatches
            minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
            idx = np.random.randint(0, len(minibatches))
            X_mini, y_mini = minibatches[idx]
            
            # Train the model
            y_prob, caches = self.train_forward(X_mini, train=True)
            _, dy = self.loss_function(y_prob, y_mini)
            _, grads = self.train_backward(dy, caches, y_prob)
            
            # Update the model for input layer
            for key in grads[0].keys():
                self.model[0][key] -= alpha * grads[0][key]

            # Update the model for the hidden layers
            for layer in range(self.L):
                for key in grads[1][layer].keys():
                    self.model[1][layer][key] -= alpha * grads[1][layer][key]

            # Update the model for output layer
            for key in grads[2].keys():
                self.model[2][key] -= alpha * grads[2][key]
            
            # Training accuracy
            y_pred, y_prob = self.test(X_mini)
            loss, _ = self.loss_function(y_prob, y_mini) # softmax is included in entropy loss function
            self.losses['train'].append(loss)
            acc = np.mean(y_pred == y_mini) # confusion matrix
            self.losses['train_acc'].append(acc)

            # Validate the updated model
            y_pred, y_prob = self.test(X_val)
            valid_loss, _ = self.loss_function(y_prob, y_val) # softmax is included in entropy loss function
            self.losses['valid'].append(valid_loss)
            valid_acc = np.mean(y_pred == y_val) # confusion matrix
            self.losses['valid_acc'].append(valid_acc)
            
            # Test the final model
            y_pred, y_prob = nn.test(X_test)
            test_loss, _ = self.loss_function(y_prob, y_test) # softmax is included in entropy loss function
            self.losses['test'].append(test_loss)
            test_acc = np.mean(y_pred == y_test)
            self.losses['test_acc'].append(test_acc)
#             print('Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.
#             format(acc.mean(), acc.std(), loss))
            
            # Print the model info: loss & accuracy or err & acc
            if iter % print_after == 0:
                print('Iter-{}, train loss-{:.4f}, acc-{:.4f}, valid loss-{:.4f}, acc-{:.4f}, test loss-{:.4f}, acc-{:.4f}'.format(
                   iter, loss, acc, valid_loss, valid_acc, test_loss, test_acc))

In [7]:
# Hyper-parameters
n_iter = 10000 # number of epochs
alpha = 1e-3 # learning_rate
mb_size = 50 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 10 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = y_train.max() + 1 # number of classes in this classification problem
num_layers = 2 # depth 

# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers)

nn.sgd(train_set=(X_train, y_train), val_set=(X_val, y_val), mb_size=mb_size, alpha=alpha, 
           n_iter=n_iter, print_after=print_after)


Iter-10, train loss-2.3092, acc-0.1200, valid loss-2.3169, acc-0.0736, test loss-2.3132, acc-0.0864
Iter-20, train loss-2.3009, acc-0.1400, valid loss-2.3141, acc-0.0778, test loss-2.3104, acc-0.0912
Iter-30, train loss-2.3261, acc-0.0400, valid loss-2.3112, acc-0.0822, test loss-2.3074, acc-0.0957
Iter-40, train loss-2.3269, acc-0.0200, valid loss-2.3081, acc-0.0874, test loss-2.3044, acc-0.1009
Iter-50, train loss-2.3025, acc-0.0800, valid loss-2.3052, acc-0.0924, test loss-2.3014, acc-0.1068
Iter-60, train loss-2.2883, acc-0.1800, valid loss-2.3023, acc-0.0984, test loss-2.2985, acc-0.1143
Iter-70, train loss-2.3196, acc-0.0200, valid loss-2.2992, acc-0.1040, test loss-2.2955, acc-0.1210
Iter-80, train loss-2.2822, acc-0.1400, valid loss-2.2962, acc-0.1116, test loss-2.2925, acc-0.1262
Iter-90, train loss-2.3076, acc-0.1000, valid loss-2.2932, acc-0.1182, test loss-2.2895, acc-0.1311
Iter-100, train loss-2.2728, acc-0.2000, valid loss-2.2903, acc-0.1252, test loss-2.2866, acc-0.1393
Iter-110, train loss-2.3094, acc-0.1000, valid loss-2.2875, acc-0.1318, test loss-2.2838, acc-0.1459
Iter-120, train loss-2.2800, acc-0.1200, valid loss-2.2846, acc-0.1394, test loss-2.2810, acc-0.1522
Iter-130, train loss-2.2923, acc-0.1400, valid loss-2.2817, acc-0.1460, test loss-2.2780, acc-0.1598
Iter-140, train loss-2.2828, acc-0.1000, valid loss-2.2790, acc-0.1524, test loss-2.2753, acc-0.1671
Iter-150, train loss-2.2498, acc-0.2400, valid loss-2.2761, acc-0.1622, test loss-2.2724, acc-0.1744
Iter-160, train loss-2.2661, acc-0.2400, valid loss-2.2734, acc-0.1698, test loss-2.2697, acc-0.1813
Iter-170, train loss-2.2769, acc-0.1600, valid loss-2.2704, acc-0.1816, test loss-2.2667, acc-0.1911
Iter-180, train loss-2.2744, acc-0.2000, valid loss-2.2677, acc-0.1894, test loss-2.2639, acc-0.1997
Iter-190, train loss-2.2570, acc-0.1600, valid loss-2.2647, acc-0.1990, test loss-2.2610, acc-0.2090
Iter-200, train loss-2.2677, acc-0.2400, valid loss-2.2618, acc-0.2086, test loss-2.2581, acc-0.2171
Iter-210, train loss-2.2619, acc-0.2400, valid loss-2.2590, acc-0.2162, test loss-2.2553, acc-0.2262
Iter-220, train loss-2.2544, acc-0.2600, valid loss-2.2564, acc-0.2226, test loss-2.2527, acc-0.2335
Iter-230, train loss-2.2747, acc-0.1800, valid loss-2.2536, acc-0.2318, test loss-2.2499, acc-0.2407
Iter-240, train loss-2.2491, acc-0.3000, valid loss-2.2506, acc-0.2396, test loss-2.2469, acc-0.2478
Iter-250, train loss-2.2501, acc-0.2800, valid loss-2.2479, acc-0.2450, test loss-2.2442, acc-0.2575
Iter-260, train loss-2.2389, acc-0.2400, valid loss-2.2450, acc-0.2528, test loss-2.2413, acc-0.2662
Iter-270, train loss-2.2189, acc-0.3200, valid loss-2.2422, acc-0.2592, test loss-2.2384, acc-0.2737
Iter-280, train loss-2.2182, acc-0.3200, valid loss-2.2393, acc-0.2678, test loss-2.2356, acc-0.2813
Iter-290, train loss-2.2339, acc-0.3200, valid loss-2.2365, acc-0.2734, test loss-2.2328, acc-0.2893
Iter-300, train loss-2.2397, acc-0.2800, valid loss-2.2337, acc-0.2804, test loss-2.2300, acc-0.2944
Iter-310, train loss-2.2380, acc-0.3000, valid loss-2.2310, acc-0.2872, test loss-2.2272, acc-0.2998
Iter-320, train loss-2.2267, acc-0.3200, valid loss-2.2282, acc-0.2942, test loss-2.2245, acc-0.3061
Iter-330, train loss-2.2214, acc-0.3200, valid loss-2.2254, acc-0.2986, test loss-2.2217, acc-0.3114
Iter-340, train loss-2.2151, acc-0.3200, valid loss-2.2225, acc-0.3034, test loss-2.2188, acc-0.3181
Iter-350, train loss-2.2109, acc-0.2800, valid loss-2.2197, acc-0.3080, test loss-2.2160, acc-0.3240
Iter-360, train loss-2.2086, acc-0.4000, valid loss-2.2167, acc-0.3142, test loss-2.2130, acc-0.3280
Iter-370, train loss-2.2023, acc-0.3400, valid loss-2.2138, acc-0.3216, test loss-2.2101, acc-0.3350
Iter-380, train loss-2.1935, acc-0.4400, valid loss-2.2109, acc-0.3262, test loss-2.2072, acc-0.3399
Iter-390, train loss-2.1931, acc-0.4200, valid loss-2.2080, acc-0.3312, test loss-2.2043, acc-0.3445
Iter-400, train loss-2.2058, acc-0.3600, valid loss-2.2052, acc-0.3368, test loss-2.2016, acc-0.3493
Iter-410, train loss-2.1847, acc-0.4200, valid loss-2.2024, acc-0.3414, test loss-2.1988, acc-0.3547
Iter-420, train loss-2.2383, acc-0.2400, valid loss-2.1998, acc-0.3450, test loss-2.1961, acc-0.3589
Iter-430, train loss-2.1836, acc-0.3800, valid loss-2.1969, acc-0.3484, test loss-2.1932, acc-0.3646
Iter-440, train loss-2.1626, acc-0.4600, valid loss-2.1939, acc-0.3546, test loss-2.1902, acc-0.3690
Iter-450, train loss-2.1748, acc-0.3800, valid loss-2.1912, acc-0.3588, test loss-2.1875, acc-0.3740
Iter-460, train loss-2.1879, acc-0.3800, valid loss-2.1884, acc-0.3622, test loss-2.1847, acc-0.3771
Iter-470, train loss-2.1591, acc-0.4200, valid loss-2.1856, acc-0.3664, test loss-2.1819, acc-0.3810
Iter-480, train loss-2.1647, acc-0.3800, valid loss-2.1829, acc-0.3696, test loss-2.1792, acc-0.3843
Iter-490, train loss-2.1980, acc-0.4000, valid loss-2.1801, acc-0.3728, test loss-2.1763, acc-0.3871
Iter-500, train loss-2.1854, acc-0.3600, valid loss-2.1774, acc-0.3762, test loss-2.1737, acc-0.3913
Iter-510, train loss-2.1489, acc-0.4600, valid loss-2.1747, acc-0.3790, test loss-2.1709, acc-0.3954
Iter-520, train loss-2.1645, acc-0.4000, valid loss-2.1719, acc-0.3814, test loss-2.1682, acc-0.3983
Iter-530, train loss-2.1714, acc-0.4400, valid loss-2.1691, acc-0.3848, test loss-2.1654, acc-0.4014
Iter-540, train loss-2.2096, acc-0.2200, valid loss-2.1663, acc-0.3882, test loss-2.1626, acc-0.4041
Iter-550, train loss-2.1319, acc-0.5800, valid loss-2.1636, acc-0.3910, test loss-2.1599, acc-0.4066
Iter-560, train loss-2.1612, acc-0.4400, valid loss-2.1609, acc-0.3934, test loss-2.1572, acc-0.4089
Iter-570, train loss-2.1536, acc-0.5000, valid loss-2.1581, acc-0.3962, test loss-2.1543, acc-0.4121
Iter-580, train loss-2.1523, acc-0.4200, valid loss-2.1553, acc-0.3988, test loss-2.1516, acc-0.4164
Iter-590, train loss-2.1436, acc-0.4200, valid loss-2.1527, acc-0.4016, test loss-2.1489, acc-0.4188
Iter-600, train loss-2.1690, acc-0.3600, valid loss-2.1500, acc-0.4060, test loss-2.1462, acc-0.4232
Iter-610, train loss-2.1334, acc-0.4800, valid loss-2.1472, acc-0.4096, test loss-2.1434, acc-0.4262
Iter-620, train loss-2.1123, acc-0.4600, valid loss-2.1444, acc-0.4128, test loss-2.1406, acc-0.4300
Iter-630, train loss-2.1703, acc-0.3800, valid loss-2.1417, acc-0.4150, test loss-2.1379, acc-0.4337
Iter-640, train loss-2.1254, acc-0.4800, valid loss-2.1390, acc-0.4166, test loss-2.1352, acc-0.4358
Iter-650, train loss-2.0766, acc-0.5400, valid loss-2.1361, acc-0.4192, test loss-2.1323, acc-0.4379
Iter-660, train loss-2.1284, acc-0.4800, valid loss-2.1333, acc-0.4234, test loss-2.1295, acc-0.4418
Iter-670, train loss-2.1289, acc-0.5000, valid loss-2.1305, acc-0.4272, test loss-2.1267, acc-0.4452
Iter-680, train loss-2.1323, acc-0.3800, valid loss-2.1276, acc-0.4312, test loss-2.1238, acc-0.4484
Iter-690, train loss-2.1423, acc-0.4000, valid loss-2.1250, acc-0.4328, test loss-2.1211, acc-0.4511
Iter-700, train loss-2.1261, acc-0.4200, valid loss-2.1220, acc-0.4358, test loss-2.1182, acc-0.4536
Iter-710, train loss-2.1077, acc-0.4600, valid loss-2.1193, acc-0.4378, test loss-2.1154, acc-0.4558
Iter-720, train loss-2.1307, acc-0.4000, valid loss-2.1165, acc-0.4408, test loss-2.1127, acc-0.4587
Iter-730, train loss-2.1326, acc-0.5000, valid loss-2.1138, acc-0.4428, test loss-2.1099, acc-0.4614
Iter-740, train loss-2.0916, acc-0.4400, valid loss-2.1109, acc-0.4450, test loss-2.1071, acc-0.4626
Iter-750, train loss-2.1150, acc-0.4400, valid loss-2.1080, acc-0.4472, test loss-2.1042, acc-0.4643
Iter-760, train loss-2.1006, acc-0.4600, valid loss-2.1052, acc-0.4492, test loss-2.1014, acc-0.4664
Iter-770, train loss-2.0620, acc-0.5200, valid loss-2.1023, acc-0.4520, test loss-2.0985, acc-0.4681
Iter-780, train loss-2.0776, acc-0.5000, valid loss-2.0994, acc-0.4538, test loss-2.0956, acc-0.4705
Iter-790, train loss-2.0908, acc-0.4800, valid loss-2.0967, acc-0.4556, test loss-2.0928, acc-0.4714
Iter-800, train loss-2.0983, acc-0.5000, valid loss-2.0938, acc-0.4584, test loss-2.0900, acc-0.4731
Iter-810, train loss-2.0439, acc-0.5200, valid loss-2.0910, acc-0.4604, test loss-2.0871, acc-0.4749
Iter-820, train loss-2.0617, acc-0.5200, valid loss-2.0880, acc-0.4622, test loss-2.0842, acc-0.4768
Iter-830, train loss-2.0594, acc-0.6000, valid loss-2.0853, acc-0.4642, test loss-2.0814, acc-0.4791
Iter-840, train loss-2.0752, acc-0.5000, valid loss-2.0825, acc-0.4656, test loss-2.0786, acc-0.4808
Iter-850, train loss-2.1120, acc-0.4800, valid loss-2.0797, acc-0.4674, test loss-2.0759, acc-0.4820
Iter-860, train loss-2.0707, acc-0.4000, valid loss-2.0769, acc-0.4698, test loss-2.0729, acc-0.4834
Iter-870, train loss-2.1215, acc-0.3800, valid loss-2.0740, acc-0.4724, test loss-2.0701, acc-0.4851
Iter-880, train loss-2.0811, acc-0.5200, valid loss-2.0712, acc-0.4736, test loss-2.0673, acc-0.4869
Iter-890, train loss-2.0442, acc-0.5000, valid loss-2.0683, acc-0.4744, test loss-2.0643, acc-0.4877
Iter-900, train loss-2.0700, acc-0.4800, valid loss-2.0652, acc-0.4760, test loss-2.0613, acc-0.4895
Iter-910, train loss-2.0732, acc-0.5000, valid loss-2.0624, acc-0.4762, test loss-2.0584, acc-0.4898
Iter-920, train loss-2.0683, acc-0.4400, valid loss-2.0595, acc-0.4780, test loss-2.0555, acc-0.4911
Iter-930, train loss-2.0434, acc-0.6000, valid loss-2.0566, acc-0.4794, test loss-2.0527, acc-0.4925
Iter-940, train loss-2.0408, acc-0.4600, valid loss-2.0537, acc-0.4820, test loss-2.0497, acc-0.4944
Iter-950, train loss-2.0978, acc-0.4200, valid loss-2.0509, acc-0.4852, test loss-2.0470, acc-0.4969
Iter-960, train loss-2.0418, acc-0.5400, valid loss-2.0481, acc-0.4864, test loss-2.0441, acc-0.4978
Iter-970, train loss-2.0402, acc-0.5200, valid loss-2.0452, acc-0.4868, test loss-2.0412, acc-0.4987
Iter-980, train loss-2.0240, acc-0.5200, valid loss-2.0425, acc-0.4892, test loss-2.0385, acc-0.4998
Iter-990, train loss-2.0367, acc-0.5400, valid loss-2.0396, acc-0.4904, test loss-2.0356, acc-0.5008
Iter-1000, train loss-2.0580, acc-0.5200, valid loss-2.0367, acc-0.4910, test loss-2.0327, acc-0.5013
Iter-1010, train loss-2.1015, acc-0.4200, valid loss-2.0339, acc-0.4922, test loss-2.0299, acc-0.5017
Iter-1020, train loss-1.9952, acc-0.6400, valid loss-2.0309, acc-0.4928, test loss-2.0269, acc-0.5022
Iter-1030, train loss-2.0415, acc-0.5400, valid loss-2.0278, acc-0.4932, test loss-2.0238, acc-0.5028
Iter-1040, train loss-2.0497, acc-0.4400, valid loss-2.0249, acc-0.4950, test loss-2.0209, acc-0.5041
Iter-1050, train loss-2.0599, acc-0.4000, valid loss-2.0220, acc-0.4964, test loss-2.0180, acc-0.5060
Iter-1060, train loss-1.9740, acc-0.6400, valid loss-2.0191, acc-0.4980, test loss-2.0151, acc-0.5068
Iter-1070, train loss-2.0357, acc-0.4600, valid loss-2.0163, acc-0.4986, test loss-2.0123, acc-0.5073
Iter-1080, train loss-2.0259, acc-0.5200, valid loss-2.0135, acc-0.4996, test loss-2.0095, acc-0.5085
Iter-1090, train loss-1.9931, acc-0.6000, valid loss-2.0105, acc-0.5006, test loss-2.0066, acc-0.5100
Iter-1100, train loss-1.9463, acc-0.6600, valid loss-2.0076, acc-0.5008, test loss-2.0037, acc-0.5106
Iter-1110, train loss-2.0034, acc-0.4800, valid loss-2.0046, acc-0.5018, test loss-2.0007, acc-0.5113
Iter-1120, train loss-1.9913, acc-0.5200, valid loss-2.0017, acc-0.5024, test loss-1.9977, acc-0.5119
Iter-1130, train loss-1.9063, acc-0.5400, valid loss-1.9988, acc-0.5028, test loss-1.9949, acc-0.5134
Iter-1140, train loss-2.0478, acc-0.4800, valid loss-1.9960, acc-0.5040, test loss-1.9921, acc-0.5141
Iter-1150, train loss-2.0244, acc-0.4800, valid loss-1.9932, acc-0.5046, test loss-1.9893, acc-0.5159
Iter-1160, train loss-1.9865, acc-0.5000, valid loss-1.9903, acc-0.5054, test loss-1.9864, acc-0.5168
Iter-1170, train loss-1.9868, acc-0.4400, valid loss-1.9875, acc-0.5058, test loss-1.9836, acc-0.5172
Iter-1180, train loss-2.0255, acc-0.4600, valid loss-1.9846, acc-0.5084, test loss-1.9807, acc-0.5183
Iter-1190, train loss-1.9963, acc-0.4000, valid loss-1.9816, acc-0.5088, test loss-1.9777, acc-0.5186
Iter-1200, train loss-1.9895, acc-0.4600, valid loss-1.9786, acc-0.5100, test loss-1.9747, acc-0.5195
Iter-1210, train loss-1.9312, acc-0.5000, valid loss-1.9758, acc-0.5104, test loss-1.9720, acc-0.5210
Iter-1220, train loss-2.0194, acc-0.4400, valid loss-1.9730, acc-0.5118, test loss-1.9692, acc-0.5222
Iter-1230, train loss-1.9642, acc-0.5000, valid loss-1.9702, acc-0.5124, test loss-1.9663, acc-0.5233
Iter-1240, train loss-1.9940, acc-0.4400, valid loss-1.9674, acc-0.5136, test loss-1.9635, acc-0.5237
Iter-1250, train loss-1.9414, acc-0.5000, valid loss-1.9645, acc-0.5138, test loss-1.9606, acc-0.5246
Iter-1260, train loss-1.9946, acc-0.4800, valid loss-1.9615, acc-0.5136, test loss-1.9577, acc-0.5241
Iter-1270, train loss-2.0057, acc-0.4600, valid loss-1.9586, acc-0.5142, test loss-1.9547, acc-0.5253
Iter-1280, train loss-1.9439, acc-0.4800, valid loss-1.9556, acc-0.5154, test loss-1.9517, acc-0.5258
Iter-1290, train loss-2.0038, acc-0.4400, valid loss-1.9527, acc-0.5160, test loss-1.9489, acc-0.5269
Iter-1300, train loss-1.9260, acc-0.5200, valid loss-1.9498, acc-0.5174, test loss-1.9460, acc-0.5275
Iter-1310, train loss-1.9796, acc-0.4400, valid loss-1.9471, acc-0.5184, test loss-1.9433, acc-0.5278
Iter-1320, train loss-1.8747, acc-0.7400, valid loss-1.9442, acc-0.5188, test loss-1.9404, acc-0.5282
Iter-1330, train loss-1.9133, acc-0.5000, valid loss-1.9412, acc-0.5196, test loss-1.9374, acc-0.5290
Iter-1340, train loss-1.9011, acc-0.6000, valid loss-1.9382, acc-0.5188, test loss-1.9343, acc-0.5299
Iter-1350, train loss-1.9331, acc-0.4200, valid loss-1.9353, acc-0.5200, test loss-1.9315, acc-0.5301
Iter-1360, train loss-1.9571, acc-0.4200, valid loss-1.9323, acc-0.5206, test loss-1.9286, acc-0.5311
Iter-1370, train loss-1.8840, acc-0.5800, valid loss-1.9294, acc-0.5216, test loss-1.9257, acc-0.5316
Iter-1380, train loss-1.9443, acc-0.6000, valid loss-1.9264, acc-0.5210, test loss-1.9227, acc-0.5313
Iter-1390, train loss-1.8972, acc-0.5600, valid loss-1.9234, acc-0.5224, test loss-1.9196, acc-0.5323
Iter-1400, train loss-1.9554, acc-0.5400, valid loss-1.9205, acc-0.5244, test loss-1.9168, acc-0.5333
Iter-1410, train loss-1.9026, acc-0.5800, valid loss-1.9176, acc-0.5236, test loss-1.9139, acc-0.5342
Iter-1420, train loss-1.9350, acc-0.5600, valid loss-1.9147, acc-0.5240, test loss-1.9110, acc-0.5354
Iter-1430, train loss-1.8776, acc-0.6200, valid loss-1.9118, acc-0.5242, test loss-1.9081, acc-0.5348
Iter-1440, train loss-1.9188, acc-0.5800, valid loss-1.9088, acc-0.5252, test loss-1.9051, acc-0.5355
Iter-1450, train loss-1.9334, acc-0.4000, valid loss-1.9059, acc-0.5256, test loss-1.9022, acc-0.5356
Iter-1460, train loss-1.8716, acc-0.5000, valid loss-1.9030, acc-0.5258, test loss-1.8993, acc-0.5356
Iter-1470, train loss-1.8008, acc-0.6400, valid loss-1.9000, acc-0.5256, test loss-1.8963, acc-0.5353
Iter-1480, train loss-1.9283, acc-0.4200, valid loss-1.8972, acc-0.5276, test loss-1.8935, acc-0.5359
Iter-1490, train loss-1.8868, acc-0.5600, valid loss-1.8942, acc-0.5296, test loss-1.8905, acc-0.5367
Iter-1500, train loss-1.8961, acc-0.5200, valid loss-1.8913, acc-0.5304, test loss-1.8876, acc-0.5368
Iter-1510, train loss-1.8846, acc-0.4600, valid loss-1.8885, acc-0.5314, test loss-1.8848, acc-0.5378
Iter-1520, train loss-1.8000, acc-0.7000, valid loss-1.8855, acc-0.5320, test loss-1.8819, acc-0.5384
Iter-1530, train loss-1.9184, acc-0.5600, valid loss-1.8827, acc-0.5326, test loss-1.8791, acc-0.5396
Iter-1540, train loss-1.9070, acc-0.4600, valid loss-1.8799, acc-0.5334, test loss-1.8762, acc-0.5399
Iter-1550, train loss-1.8623, acc-0.5000, valid loss-1.8770, acc-0.5338, test loss-1.8734, acc-0.5404
Iter-1560, train loss-1.8698, acc-0.6200, valid loss-1.8742, acc-0.5350, test loss-1.8705, acc-0.5401
Iter-1570, train loss-1.8700, acc-0.5400, valid loss-1.8712, acc-0.5364, test loss-1.8676, acc-0.5407
Iter-1580, train loss-1.9006, acc-0.4600, valid loss-1.8682, acc-0.5368, test loss-1.8646, acc-0.5411
Iter-1590, train loss-1.7892, acc-0.5000, valid loss-1.8654, acc-0.5376, test loss-1.8618, acc-0.5416
Iter-1600, train loss-1.9168, acc-0.3800, valid loss-1.8626, acc-0.5384, test loss-1.8590, acc-0.5420
Iter-1610, train loss-1.8304, acc-0.6200, valid loss-1.8596, acc-0.5378, test loss-1.8560, acc-0.5420
Iter-1620, train loss-1.8742, acc-0.5200, valid loss-1.8568, acc-0.5384, test loss-1.8532, acc-0.5437
Iter-1630, train loss-1.8967, acc-0.5400, valid loss-1.8539, acc-0.5384, test loss-1.8504, acc-0.5444
Iter-1640, train loss-1.8165, acc-0.6400, valid loss-1.8511, acc-0.5394, test loss-1.8476, acc-0.5453
Iter-1650, train loss-1.8489, acc-0.5000, valid loss-1.8482, acc-0.5398, test loss-1.8447, acc-0.5461
Iter-1660, train loss-1.7967, acc-0.6600, valid loss-1.8453, acc-0.5400, test loss-1.8418, acc-0.5465
Iter-1670, train loss-1.8408, acc-0.4600, valid loss-1.8423, acc-0.5410, test loss-1.8389, acc-0.5476
Iter-1680, train loss-1.8434, acc-0.5400, valid loss-1.8395, acc-0.5410, test loss-1.8360, acc-0.5477
Iter-1690, train loss-1.8288, acc-0.6000, valid loss-1.8364, acc-0.5406, test loss-1.8330, acc-0.5476
Iter-1700, train loss-1.9330, acc-0.4200, valid loss-1.8336, acc-0.5414, test loss-1.8302, acc-0.5476
Iter-1710, train loss-1.8455, acc-0.4000, valid loss-1.8308, acc-0.5420, test loss-1.8274, acc-0.5476
Iter-1720, train loss-1.8178, acc-0.5200, valid loss-1.8279, acc-0.5420, test loss-1.8245, acc-0.5477
Iter-1730, train loss-1.8468, acc-0.5400, valid loss-1.8251, acc-0.5426, test loss-1.8217, acc-0.5471
Iter-1740, train loss-1.8265, acc-0.5800, valid loss-1.8223, acc-0.5424, test loss-1.8189, acc-0.5481
Iter-1750, train loss-1.8573, acc-0.4800, valid loss-1.8195, acc-0.5428, test loss-1.8161, acc-0.5489
Iter-1760, train loss-1.8715, acc-0.5400, valid loss-1.8168, acc-0.5432, test loss-1.8134, acc-0.5492
Iter-1770, train loss-1.8558, acc-0.5000, valid loss-1.8138, acc-0.5442, test loss-1.8104, acc-0.5499
Iter-1780, train loss-1.7905, acc-0.6000, valid loss-1.8109, acc-0.5454, test loss-1.8075, acc-0.5502
Iter-1790, train loss-1.8867, acc-0.4000, valid loss-1.8080, acc-0.5456, test loss-1.8047, acc-0.5505
Iter-1800, train loss-1.8203, acc-0.5200, valid loss-1.8052, acc-0.5462, test loss-1.8019, acc-0.5514
Iter-1810, train loss-1.8378, acc-0.4800, valid loss-1.8024, acc-0.5468, test loss-1.7991, acc-0.5513
Iter-1820, train loss-1.7986, acc-0.5800, valid loss-1.7995, acc-0.5474, test loss-1.7962, acc-0.5517
Iter-1830, train loss-1.8147, acc-0.5600, valid loss-1.7966, acc-0.5480, test loss-1.7933, acc-0.5523
Iter-1840, train loss-1.8541, acc-0.4400, valid loss-1.7938, acc-0.5482, test loss-1.7905, acc-0.5528
Iter-1850, train loss-1.7948, acc-0.6200, valid loss-1.7909, acc-0.5490, test loss-1.7876, acc-0.5527
Iter-1860, train loss-1.7779, acc-0.5000, valid loss-1.7882, acc-0.5492, test loss-1.7849, acc-0.5532
Iter-1870, train loss-1.8147, acc-0.5800, valid loss-1.7854, acc-0.5490, test loss-1.7821, acc-0.5534
Iter-1880, train loss-1.8892, acc-0.4400, valid loss-1.7826, acc-0.5490, test loss-1.7793, acc-0.5533
Iter-1890, train loss-1.8083, acc-0.5800, valid loss-1.7797, acc-0.5492, test loss-1.7764, acc-0.5536
Iter-1900, train loss-1.8089, acc-0.5000, valid loss-1.7770, acc-0.5492, test loss-1.7737, acc-0.5537
Iter-1910, train loss-1.7138, acc-0.6600, valid loss-1.7741, acc-0.5496, test loss-1.7709, acc-0.5546
Iter-1920, train loss-1.8368, acc-0.4200, valid loss-1.7712, acc-0.5506, test loss-1.7680, acc-0.5546
Iter-1930, train loss-1.7468, acc-0.6000, valid loss-1.7684, acc-0.5512, test loss-1.7652, acc-0.5553
Iter-1940, train loss-1.6818, acc-0.6000, valid loss-1.7655, acc-0.5520, test loss-1.7623, acc-0.5562
Iter-1950, train loss-1.8415, acc-0.6000, valid loss-1.7625, acc-0.5520, test loss-1.7594, acc-0.5564
Iter-1960, train loss-1.7407, acc-0.5200, valid loss-1.7598, acc-0.5528, test loss-1.7567, acc-0.5572
Iter-1970, train loss-1.7759, acc-0.4200, valid loss-1.7570, acc-0.5528, test loss-1.7539, acc-0.5572
Iter-1980, train loss-1.8283, acc-0.4800, valid loss-1.7542, acc-0.5540, test loss-1.7511, acc-0.5572
Iter-1990, train loss-1.7755, acc-0.5600, valid loss-1.7512, acc-0.5540, test loss-1.7482, acc-0.5573
Iter-2000, train loss-1.6583, acc-0.6800, valid loss-1.7484, acc-0.5550, test loss-1.7454, acc-0.5575
Iter-2010, train loss-1.7600, acc-0.5200, valid loss-1.7456, acc-0.5554, test loss-1.7426, acc-0.5579
Iter-2020, train loss-1.7822, acc-0.5000, valid loss-1.7429, acc-0.5554, test loss-1.7398, acc-0.5578
Iter-2030, train loss-1.9372, acc-0.4000, valid loss-1.7402, acc-0.5562, test loss-1.7372, acc-0.5588
Iter-2040, train loss-1.7736, acc-0.5000, valid loss-1.7374, acc-0.5562, test loss-1.7344, acc-0.5591
Iter-2050, train loss-1.7812, acc-0.5200, valid loss-1.7345, acc-0.5572, test loss-1.7316, acc-0.5598
Iter-2060, train loss-1.6957, acc-0.6200, valid loss-1.7318, acc-0.5566, test loss-1.7289, acc-0.5601
Iter-2070, train loss-1.7146, acc-0.5000, valid loss-1.7291, acc-0.5574, test loss-1.7262, acc-0.5606
Iter-2080, train loss-1.8385, acc-0.4600, valid loss-1.7265, acc-0.5586, test loss-1.7236, acc-0.5617
Iter-2090, train loss-1.7735, acc-0.5200, valid loss-1.7236, acc-0.5580, test loss-1.7207, acc-0.5622
Iter-2100, train loss-1.5916, acc-0.6400, valid loss-1.7210, acc-0.5584, test loss-1.7181, acc-0.5626
Iter-2110, train loss-1.7659, acc-0.6200, valid loss-1.7183, acc-0.5596, test loss-1.7154, acc-0.5636
Iter-2120, train loss-1.6939, acc-0.6400, valid loss-1.7155, acc-0.5608, test loss-1.7127, acc-0.5651
Iter-2130, train loss-1.7025, acc-0.6600, valid loss-1.7129, acc-0.5612, test loss-1.7101, acc-0.5653
Iter-2140, train loss-1.8170, acc-0.5200, valid loss-1.7103, acc-0.5622, test loss-1.7075, acc-0.5666
Iter-2150, train loss-1.6783, acc-0.4800, valid loss-1.7076, acc-0.5630, test loss-1.7048, acc-0.5671
Iter-2160, train loss-1.6859, acc-0.6200, valid loss-1.7049, acc-0.5624, test loss-1.7021, acc-0.5672
Iter-2170, train loss-1.7130, acc-0.5200, valid loss-1.7021, acc-0.5622, test loss-1.6994, acc-0.5671
Iter-2180, train loss-1.7068, acc-0.5600, valid loss-1.6994, acc-0.5630, test loss-1.6967, acc-0.5675
Iter-2190, train loss-1.6108, acc-0.6800, valid loss-1.6967, acc-0.5628, test loss-1.6940, acc-0.5674
Iter-2200, train loss-1.6820, acc-0.5200, valid loss-1.6940, acc-0.5632, test loss-1.6913, acc-0.5681
Iter-2210, train loss-1.7553, acc-0.4800, valid loss-1.6912, acc-0.5636, test loss-1.6885, acc-0.5681
Iter-2220, train loss-1.7274, acc-0.5000, valid loss-1.6884, acc-0.5646, test loss-1.6858, acc-0.5681
Iter-2230, train loss-1.6565, acc-0.6600, valid loss-1.6857, acc-0.5646, test loss-1.6831, acc-0.5685
Iter-2240, train loss-1.6788, acc-0.5400, valid loss-1.6829, acc-0.5646, test loss-1.6804, acc-0.5686
Iter-2250, train loss-1.7005, acc-0.5800, valid loss-1.6801, acc-0.5644, test loss-1.6776, acc-0.5683
Iter-2260, train loss-1.7098, acc-0.5600, valid loss-1.6774, acc-0.5648, test loss-1.6749, acc-0.5684
Iter-2270, train loss-1.6904, acc-0.5000, valid loss-1.6747, acc-0.5654, test loss-1.6722, acc-0.5694
Iter-2280, train loss-1.6763, acc-0.5600, valid loss-1.6720, acc-0.5660, test loss-1.6695, acc-0.5700
Iter-2290, train loss-1.7041, acc-0.4400, valid loss-1.6694, acc-0.5670, test loss-1.6669, acc-0.5704
Iter-2300, train loss-1.6444, acc-0.6400, valid loss-1.6667, acc-0.5678, test loss-1.6643, acc-0.5710
Iter-2310, train loss-1.6708, acc-0.5600, valid loss-1.6640, acc-0.5682, test loss-1.6616, acc-0.5713
Iter-2320, train loss-1.6534, acc-0.5400, valid loss-1.6614, acc-0.5690, test loss-1.6590, acc-0.5720
Iter-2330, train loss-1.7275, acc-0.5600, valid loss-1.6588, acc-0.5690, test loss-1.6564, acc-0.5720
Iter-2340, train loss-1.6039, acc-0.6200, valid loss-1.6563, acc-0.5700, test loss-1.6539, acc-0.5729
Iter-2350, train loss-1.7349, acc-0.5200, valid loss-1.6536, acc-0.5702, test loss-1.6512, acc-0.5725
Iter-2360, train loss-1.5549, acc-0.6800, valid loss-1.6508, acc-0.5702, test loss-1.6485, acc-0.5728
Iter-2370, train loss-1.6276, acc-0.6200, valid loss-1.6482, acc-0.5708, test loss-1.6459, acc-0.5734
Iter-2380, train loss-1.6845, acc-0.5800, valid loss-1.6454, acc-0.5722, test loss-1.6432, acc-0.5744
Iter-2390, train loss-1.6689, acc-0.5200, valid loss-1.6428, acc-0.5744, test loss-1.6407, acc-0.5766
Iter-2400, train loss-1.6470, acc-0.6400, valid loss-1.6403, acc-0.5744, test loss-1.6382, acc-0.5762
Iter-2410, train loss-1.6414, acc-0.6000, valid loss-1.6377, acc-0.5748, test loss-1.6355, acc-0.5768
Iter-2420, train loss-1.6854, acc-0.5200, valid loss-1.6350, acc-0.5752, test loss-1.6329, acc-0.5774
Iter-2430, train loss-1.6588, acc-0.5600, valid loss-1.6323, acc-0.5762, test loss-1.6302, acc-0.5780
Iter-2440, train loss-1.6920, acc-0.5400, valid loss-1.6298, acc-0.5772, test loss-1.6277, acc-0.5794
Iter-2450, train loss-1.6248, acc-0.5800, valid loss-1.6272, acc-0.5776, test loss-1.6251, acc-0.5798
Iter-2460, train loss-1.6633, acc-0.5200, valid loss-1.6245, acc-0.5784, test loss-1.6225, acc-0.5798
Iter-2470, train loss-1.6286, acc-0.6200, valid loss-1.6218, acc-0.5804, test loss-1.6199, acc-0.5811
Iter-2480, train loss-1.6731, acc-0.5400, valid loss-1.6193, acc-0.5810, test loss-1.6174, acc-0.5822
Iter-2490, train loss-1.6172, acc-0.5600, valid loss-1.6167, acc-0.5820, test loss-1.6147, acc-0.5830
Iter-2500, train loss-1.5846, acc-0.6200, valid loss-1.6140, acc-0.5814, test loss-1.6121, acc-0.5832
Iter-2510, train loss-1.5944, acc-0.5800, valid loss-1.6113, acc-0.5826, test loss-1.6095, acc-0.5835
Iter-2520, train loss-1.5935, acc-0.5200, valid loss-1.6088, acc-0.5834, test loss-1.6070, acc-0.5842
Iter-2530, train loss-1.5191, acc-0.7000, valid loss-1.6062, acc-0.5834, test loss-1.6044, acc-0.5839
Iter-2540, train loss-1.5113, acc-0.6600, valid loss-1.6035, acc-0.5834, test loss-1.6017, acc-0.5838
Iter-2550, train loss-1.5897, acc-0.5800, valid loss-1.6010, acc-0.5834, test loss-1.5993, acc-0.5838
Iter-2560, train loss-1.6443, acc-0.5600, valid loss-1.5985, acc-0.5840, test loss-1.5967, acc-0.5844
Iter-2570, train loss-1.6923, acc-0.4800, valid loss-1.5960, acc-0.5840, test loss-1.5942, acc-0.5848
Iter-2580, train loss-1.4633, acc-0.7000, valid loss-1.5933, acc-0.5840, test loss-1.5916, acc-0.5853
Iter-2590, train loss-1.5502, acc-0.6000, valid loss-1.5907, acc-0.5854, test loss-1.5891, acc-0.5868
Iter-2600, train loss-1.6189, acc-0.5400, valid loss-1.5882, acc-0.5860, test loss-1.5865, acc-0.5877
Iter-2610, train loss-1.7084, acc-0.5000, valid loss-1.5858, acc-0.5860, test loss-1.5842, acc-0.5885
Iter-2620, train loss-1.5927, acc-0.6200, valid loss-1.5834, acc-0.5864, test loss-1.5817, acc-0.5894
Iter-2630, train loss-1.5903, acc-0.5400, valid loss-1.5807, acc-0.5864, test loss-1.5792, acc-0.5900
Iter-2640, train loss-1.5683, acc-0.6600, valid loss-1.5783, acc-0.5878, test loss-1.5768, acc-0.5913
Iter-2650, train loss-1.5171, acc-0.6200, valid loss-1.5758, acc-0.5886, test loss-1.5744, acc-0.5921
Iter-2660, train loss-1.5582, acc-0.6000, valid loss-1.5733, acc-0.5886, test loss-1.5719, acc-0.5926
Iter-2670, train loss-1.6022, acc-0.5200, valid loss-1.5708, acc-0.5896, test loss-1.5694, acc-0.5932
Iter-2680, train loss-1.5946, acc-0.5800, valid loss-1.5684, acc-0.5894, test loss-1.5670, acc-0.5937
Iter-2690, train loss-1.6117, acc-0.5600, valid loss-1.5659, acc-0.5904, test loss-1.5646, acc-0.5941
Iter-2700, train loss-1.5392, acc-0.6200, valid loss-1.5634, acc-0.5914, test loss-1.5621, acc-0.5949
Iter-2710, train loss-1.5893, acc-0.6000, valid loss-1.5609, acc-0.5926, test loss-1.5596, acc-0.5959
Iter-2720, train loss-1.5131, acc-0.7000, valid loss-1.5585, acc-0.5926, test loss-1.5571, acc-0.5964
Iter-2730, train loss-1.5150, acc-0.6000, valid loss-1.5561, acc-0.5928, test loss-1.5547, acc-0.5966
Iter-2740, train loss-1.5902, acc-0.6200, valid loss-1.5536, acc-0.5928, test loss-1.5523, acc-0.5979
Iter-2750, train loss-1.5441, acc-0.5400, valid loss-1.5511, acc-0.5928, test loss-1.5499, acc-0.5986
Iter-2760, train loss-1.5683, acc-0.5400, valid loss-1.5488, acc-0.5930, test loss-1.5476, acc-0.5986
Iter-2770, train loss-1.5190, acc-0.6600, valid loss-1.5462, acc-0.5938, test loss-1.5451, acc-0.5994
Iter-2780, train loss-1.5289, acc-0.6200, valid loss-1.5438, acc-0.5936, test loss-1.5427, acc-0.6004
Iter-2790, train loss-1.6047, acc-0.5200, valid loss-1.5414, acc-0.5952, test loss-1.5403, acc-0.6006
Iter-2800, train loss-1.6110, acc-0.5200, valid loss-1.5390, acc-0.5950, test loss-1.5379, acc-0.6015
Iter-2810, train loss-1.6588, acc-0.4600, valid loss-1.5365, acc-0.5968, test loss-1.5355, acc-0.6030
Iter-2820, train loss-1.5269, acc-0.6600, valid loss-1.5341, acc-0.5970, test loss-1.5331, acc-0.6031
Iter-2830, train loss-1.5176, acc-0.6200, valid loss-1.5317, acc-0.5982, test loss-1.5308, acc-0.6041
Iter-2840, train loss-1.5965, acc-0.5200, valid loss-1.5294, acc-0.5998, test loss-1.5285, acc-0.6054
Iter-2850, train loss-1.6550, acc-0.5600, valid loss-1.5269, acc-0.5998, test loss-1.5260, acc-0.6061
Iter-2860, train loss-1.5736, acc-0.5400, valid loss-1.5246, acc-0.6000, test loss-1.5237, acc-0.6073
Iter-2870, train loss-1.4741, acc-0.6400, valid loss-1.5222, acc-0.6008, test loss-1.5214, acc-0.6078
Iter-2880, train loss-1.5364, acc-0.5800, valid loss-1.5198, acc-0.6018, test loss-1.5190, acc-0.6079
Iter-2890, train loss-1.5300, acc-0.7400, valid loss-1.5176, acc-0.6020, test loss-1.5168, acc-0.6081
Iter-2900, train loss-1.6121, acc-0.5200, valid loss-1.5151, acc-0.6034, test loss-1.5144, acc-0.6090
Iter-2910, train loss-1.5969, acc-0.5400, valid loss-1.5129, acc-0.6042, test loss-1.5121, acc-0.6100
Iter-2920, train loss-1.5090, acc-0.5800, valid loss-1.5106, acc-0.6054, test loss-1.5100, acc-0.6112
Iter-2930, train loss-1.4528, acc-0.6000, valid loss-1.5083, acc-0.6056, test loss-1.5077, acc-0.6125
Iter-2940, train loss-1.4855, acc-0.6400, valid loss-1.5060, acc-0.6060, test loss-1.5054, acc-0.6133
Iter-2950, train loss-1.5695, acc-0.5600, valid loss-1.5037, acc-0.6076, test loss-1.5031, acc-0.6146
Iter-2960, train loss-1.4184, acc-0.6200, valid loss-1.5014, acc-0.6074, test loss-1.5008, acc-0.6150
Iter-2970, train loss-1.3732, acc-0.7000, valid loss-1.4990, acc-0.6072, test loss-1.4984, acc-0.6150
Iter-2980, train loss-1.5750, acc-0.4600, valid loss-1.4966, acc-0.6072, test loss-1.4960, acc-0.6151
Iter-2990, train loss-1.5150, acc-0.5400, valid loss-1.4942, acc-0.6072, test loss-1.4937, acc-0.6156
Iter-3000, train loss-1.5837, acc-0.5600, valid loss-1.4919, acc-0.6088, test loss-1.4914, acc-0.6166
Iter-3010, train loss-1.4114, acc-0.6600, valid loss-1.4896, acc-0.6092, test loss-1.4891, acc-0.6169
Iter-3020, train loss-1.4773, acc-0.6600, valid loss-1.4873, acc-0.6094, test loss-1.4868, acc-0.6169
Iter-3030, train loss-1.4748, acc-0.6200, valid loss-1.4850, acc-0.6104, test loss-1.4845, acc-0.6179
Iter-3040, train loss-1.5691, acc-0.5600, valid loss-1.4827, acc-0.6120, test loss-1.4823, acc-0.6195
Iter-3050, train loss-1.4690, acc-0.6000, valid loss-1.4805, acc-0.6142, test loss-1.4801, acc-0.6213
Iter-3060, train loss-1.4360, acc-0.7000, valid loss-1.4781, acc-0.6150, test loss-1.4778, acc-0.6224
Iter-3070, train loss-1.6384, acc-0.5600, valid loss-1.4759, acc-0.6146, test loss-1.4755, acc-0.6229
Iter-3080, train loss-1.5188, acc-0.5800, valid loss-1.4735, acc-0.6158, test loss-1.4732, acc-0.6241
Iter-3090, train loss-1.4328, acc-0.6600, valid loss-1.4713, acc-0.6158, test loss-1.4710, acc-0.6244
Iter-3100, train loss-1.4329, acc-0.6800, valid loss-1.4690, acc-0.6166, test loss-1.4687, acc-0.6260
Iter-3110, train loss-1.5081, acc-0.6000, valid loss-1.4667, acc-0.6186, test loss-1.4665, acc-0.6270
Iter-3120, train loss-1.4722, acc-0.5600, valid loss-1.4644, acc-0.6190, test loss-1.4642, acc-0.6277
Iter-3130, train loss-1.4678, acc-0.6600, valid loss-1.4621, acc-0.6200, test loss-1.4619, acc-0.6284
Iter-3140, train loss-1.6563, acc-0.4600, valid loss-1.4600, acc-0.6214, test loss-1.4598, acc-0.6298
Iter-3150, train loss-1.3768, acc-0.6800, valid loss-1.4578, acc-0.6216, test loss-1.4577, acc-0.6303
Iter-3160, train loss-1.4247, acc-0.7000, valid loss-1.4555, acc-0.6232, test loss-1.4554, acc-0.6307
Iter-3170, train loss-1.4386, acc-0.7600, valid loss-1.4534, acc-0.6242, test loss-1.4533, acc-0.6313
Iter-3180, train loss-1.4538, acc-0.7000, valid loss-1.4512, acc-0.6248, test loss-1.4511, acc-0.6322
Iter-3190, train loss-1.4298, acc-0.6800, valid loss-1.4489, acc-0.6262, test loss-1.4489, acc-0.6336
Iter-3200, train loss-1.4556, acc-0.7400, valid loss-1.4468, acc-0.6272, test loss-1.4467, acc-0.6341
Iter-3210, train loss-1.4504, acc-0.6600, valid loss-1.4446, acc-0.6276, test loss-1.4445, acc-0.6343
Iter-3220, train loss-1.4447, acc-0.5600, valid loss-1.4423, acc-0.6278, test loss-1.4422, acc-0.6349
Iter-3230, train loss-1.5718, acc-0.6200, valid loss-1.4401, acc-0.6286, test loss-1.4401, acc-0.6355
Iter-3240, train loss-1.4535, acc-0.6600, valid loss-1.4379, acc-0.6286, test loss-1.4379, acc-0.6358
Iter-3250, train loss-1.4446, acc-0.6600, valid loss-1.4359, acc-0.6298, test loss-1.4359, acc-0.6372
Iter-3260, train loss-1.4303, acc-0.6400, valid loss-1.4337, acc-0.6296, test loss-1.4337, acc-0.6371
Iter-3270, train loss-1.4754, acc-0.6000, valid loss-1.4315, acc-0.6298, test loss-1.4316, acc-0.6381
Iter-3280, train loss-1.4689, acc-0.5800, valid loss-1.4294, acc-0.6304, test loss-1.4295, acc-0.6385
Iter-3290, train loss-1.4947, acc-0.6400, valid loss-1.4274, acc-0.6304, test loss-1.4274, acc-0.6393
Iter-3300, train loss-1.3881, acc-0.6400, valid loss-1.4252, acc-0.6320, test loss-1.4253, acc-0.6391
Iter-3310, train loss-1.5292, acc-0.5400, valid loss-1.4231, acc-0.6324, test loss-1.4232, acc-0.6395
Iter-3320, train loss-1.3863, acc-0.6800, valid loss-1.4209, acc-0.6334, test loss-1.4211, acc-0.6404
Iter-3330, train loss-1.5333, acc-0.5600, valid loss-1.4189, acc-0.6334, test loss-1.4190, acc-0.6411
Iter-3340, train loss-1.3411, acc-0.6600, valid loss-1.4167, acc-0.6346, test loss-1.4169, acc-0.6418
Iter-3350, train loss-1.4780, acc-0.5800, valid loss-1.4147, acc-0.6354, test loss-1.4148, acc-0.6429
Iter-3360, train loss-1.3516, acc-0.6800, valid loss-1.4126, acc-0.6354, test loss-1.4127, acc-0.6430
Iter-3370, train loss-1.4601, acc-0.5800, valid loss-1.4105, acc-0.6364, test loss-1.4106, acc-0.6432
Iter-3380, train loss-1.4222, acc-0.6400, valid loss-1.4083, acc-0.6364, test loss-1.4085, acc-0.6436
Iter-3390, train loss-1.4189, acc-0.6800, valid loss-1.4062, acc-0.6372, test loss-1.4065, acc-0.6454
Iter-3400, train loss-1.3390, acc-0.7800, valid loss-1.4041, acc-0.6378, test loss-1.4044, acc-0.6454
Iter-3410, train loss-1.5202, acc-0.5200, valid loss-1.4019, acc-0.6392, test loss-1.4023, acc-0.6465
Iter-3420, train loss-1.4501, acc-0.5600, valid loss-1.3998, acc-0.6400, test loss-1.4001, acc-0.6475
Iter-3430, train loss-1.3767, acc-0.6600, valid loss-1.3978, acc-0.6412, test loss-1.3981, acc-0.6483
Iter-3440, train loss-1.3916, acc-0.6000, valid loss-1.3957, acc-0.6424, test loss-1.3961, acc-0.6491
Iter-3450, train loss-1.3919, acc-0.6800, valid loss-1.3937, acc-0.6428, test loss-1.3941, acc-0.6501
Iter-3460, train loss-1.4260, acc-0.6400, valid loss-1.3916, acc-0.6432, test loss-1.3921, acc-0.6512
Iter-3470, train loss-1.4035, acc-0.6800, valid loss-1.3896, acc-0.6444, test loss-1.3901, acc-0.6526
Iter-3480, train loss-1.3851, acc-0.6600, valid loss-1.3876, acc-0.6456, test loss-1.3881, acc-0.6532
Iter-3490, train loss-1.4318, acc-0.6800, valid loss-1.3856, acc-0.6458, test loss-1.3861, acc-0.6539
Iter-3500, train loss-1.3183, acc-0.6600, valid loss-1.3836, acc-0.6484, test loss-1.3841, acc-0.6552
Iter-3510, train loss-1.3710, acc-0.6200, valid loss-1.3816, acc-0.6494, test loss-1.3821, acc-0.6562
Iter-3520, train loss-1.3075, acc-0.7600, valid loss-1.3796, acc-0.6490, test loss-1.3801, acc-0.6563
Iter-3530, train loss-1.2676, acc-0.7600, valid loss-1.3776, acc-0.6494, test loss-1.3782, acc-0.6569
Iter-3540, train loss-1.5318, acc-0.5000, valid loss-1.3755, acc-0.6494, test loss-1.3761, acc-0.6575
Iter-3550, train loss-1.2770, acc-0.7400, valid loss-1.3735, acc-0.6512, test loss-1.3742, acc-0.6585
Iter-3560, train loss-1.3969, acc-0.6400, valid loss-1.3715, acc-0.6520, test loss-1.3722, acc-0.6590
Iter-3570, train loss-1.4138, acc-0.5800, valid loss-1.3695, acc-0.6522, test loss-1.3702, acc-0.6598
Iter-3580, train loss-1.2301, acc-0.7800, valid loss-1.3675, acc-0.6526, test loss-1.3682, acc-0.6607
Iter-3590, train loss-1.3318, acc-0.6600, valid loss-1.3655, acc-0.6534, test loss-1.3662, acc-0.6616
Iter-3600, train loss-1.3686, acc-0.6400, valid loss-1.3635, acc-0.6534, test loss-1.3643, acc-0.6625
Iter-3610, train loss-1.3793, acc-0.6600, valid loss-1.3616, acc-0.6540, test loss-1.3624, acc-0.6630
Iter-3620, train loss-1.3092, acc-0.7200, valid loss-1.3597, acc-0.6544, test loss-1.3604, acc-0.6633
Iter-3630, train loss-1.4806, acc-0.5800, valid loss-1.3577, acc-0.6550, test loss-1.3585, acc-0.6643
Iter-3640, train loss-1.3873, acc-0.6800, valid loss-1.3558, acc-0.6556, test loss-1.3566, acc-0.6653
Iter-3650, train loss-1.2753, acc-0.6600, valid loss-1.3538, acc-0.6568, test loss-1.3546, acc-0.6659
Iter-3660, train loss-1.4389, acc-0.6000, valid loss-1.3517, acc-0.6580, test loss-1.3526, acc-0.6664
Iter-3670, train loss-1.2901, acc-0.6600, valid loss-1.3498, acc-0.6586, test loss-1.3507, acc-0.6670
Iter-3680, train loss-1.3642, acc-0.7000, valid loss-1.3478, acc-0.6588, test loss-1.3488, acc-0.6682
Iter-3690, train loss-1.3236, acc-0.6400, valid loss-1.3460, acc-0.6594, test loss-1.3470, acc-0.6683
Iter-3700, train loss-1.3170, acc-0.6800, valid loss-1.3441, acc-0.6600, test loss-1.3451, acc-0.6694
Iter-3710, train loss-1.3811, acc-0.6400, valid loss-1.3421, acc-0.6612, test loss-1.3431, acc-0.6698
Iter-3720, train loss-1.3844, acc-0.6400, valid loss-1.3401, acc-0.6624, test loss-1.3411, acc-0.6709
Iter-3730, train loss-1.2532, acc-0.6800, valid loss-1.3381, acc-0.6622, test loss-1.3392, acc-0.6722
Iter-3740, train loss-1.3985, acc-0.6800, valid loss-1.3362, acc-0.6632, test loss-1.3373, acc-0.6722
Iter-3750, train loss-1.3359, acc-0.7000, valid loss-1.3343, acc-0.6634, test loss-1.3354, acc-0.6721
Iter-3760, train loss-1.3338, acc-0.6800, valid loss-1.3324, acc-0.6640, test loss-1.3336, acc-0.6728
Iter-3770, train loss-1.3724, acc-0.6800, valid loss-1.3305, acc-0.6650, test loss-1.3317, acc-0.6738
Iter-3780, train loss-1.2381, acc-0.7200, valid loss-1.3286, acc-0.6660, test loss-1.3299, acc-0.6747
Iter-3790, train loss-1.4782, acc-0.5200, valid loss-1.3268, acc-0.6660, test loss-1.3281, acc-0.6744
Iter-3800, train loss-1.3529, acc-0.6400, valid loss-1.3250, acc-0.6664, test loss-1.3262, acc-0.6750
Iter-3810, train loss-1.2304, acc-0.7200, valid loss-1.3230, acc-0.6666, test loss-1.3243, acc-0.6760
Iter-3820, train loss-1.4731, acc-0.5600, valid loss-1.3212, acc-0.6672, test loss-1.3225, acc-0.6772
Iter-3830, train loss-1.3531, acc-0.6400, valid loss-1.3193, acc-0.6682, test loss-1.3206, acc-0.6778
Iter-3840, train loss-1.3398, acc-0.5600, valid loss-1.3174, acc-0.6690, test loss-1.3188, acc-0.6782
Iter-3850, train loss-1.3116, acc-0.6600, valid loss-1.3155, acc-0.6700, test loss-1.3169, acc-0.6794
Iter-3860, train loss-1.2857, acc-0.7200, valid loss-1.3137, acc-0.6704, test loss-1.3151, acc-0.6794
Iter-3870, train loss-1.2591, acc-0.7200, valid loss-1.3120, acc-0.6710, test loss-1.3134, acc-0.6796
Iter-3880, train loss-1.2561, acc-0.7400, valid loss-1.3101, acc-0.6720, test loss-1.3117, acc-0.6798
Iter-3890, train loss-1.5377, acc-0.5400, valid loss-1.3084, acc-0.6722, test loss-1.3099, acc-0.6801
Iter-3900, train loss-1.3635, acc-0.6800, valid loss-1.3066, acc-0.6732, test loss-1.3081, acc-0.6804
Iter-3910, train loss-1.4578, acc-0.5600, valid loss-1.3049, acc-0.6730, test loss-1.3064, acc-0.6809
Iter-3920, train loss-1.3670, acc-0.7000, valid loss-1.3031, acc-0.6730, test loss-1.3047, acc-0.6815
Iter-3930, train loss-1.2958, acc-0.7000, valid loss-1.3012, acc-0.6736, test loss-1.3029, acc-0.6822
Iter-3940, train loss-1.4126, acc-0.6400, valid loss-1.2995, acc-0.6736, test loss-1.3011, acc-0.6819
Iter-3950, train loss-1.2395, acc-0.7000, valid loss-1.2977, acc-0.6748, test loss-1.2994, acc-0.6830
Iter-3960, train loss-1.2138, acc-0.7200, valid loss-1.2960, acc-0.6756, test loss-1.2977, acc-0.6826
Iter-3970, train loss-1.2184, acc-0.7400, valid loss-1.2943, acc-0.6756, test loss-1.2959, acc-0.6835
Iter-3980, train loss-1.2736, acc-0.7400, valid loss-1.2925, acc-0.6758, test loss-1.2942, acc-0.6837
Iter-3990, train loss-1.3791, acc-0.6200, valid loss-1.2907, acc-0.6764, test loss-1.2924, acc-0.6847
Iter-4000, train loss-1.3530, acc-0.6400, valid loss-1.2889, acc-0.6776, test loss-1.2906, acc-0.6858
Iter-4010, train loss-1.2563, acc-0.7200, valid loss-1.2871, acc-0.6784, test loss-1.2888, acc-0.6868
Iter-4020, train loss-1.4851, acc-0.5600, valid loss-1.2854, acc-0.6788, test loss-1.2871, acc-0.6873
Iter-4030, train loss-1.3928, acc-0.6000, valid loss-1.2836, acc-0.6802, test loss-1.2854, acc-0.6882
Iter-4040, train loss-1.2260, acc-0.7600, valid loss-1.2819, acc-0.6812, test loss-1.2837, acc-0.6884
Iter-4050, train loss-1.3049, acc-0.6600, valid loss-1.2801, acc-0.6816, test loss-1.2819, acc-0.6895
Iter-4060, train loss-1.1634, acc-0.7200, valid loss-1.2783, acc-0.6822, test loss-1.2801, acc-0.6892
Iter-4070, train loss-1.2609, acc-0.6600, valid loss-1.2766, acc-0.6822, test loss-1.2785, acc-0.6897
Iter-4080, train loss-1.2548, acc-0.7400, valid loss-1.2748, acc-0.6834, test loss-1.2768, acc-0.6905
Iter-4090, train loss-1.2641, acc-0.6600, valid loss-1.2730, acc-0.6838, test loss-1.2750, acc-0.6907
Iter-4100, train loss-1.2650, acc-0.6800, valid loss-1.2713, acc-0.6838, test loss-1.2733, acc-0.6910
Iter-4110, train loss-1.3622, acc-0.5800, valid loss-1.2697, acc-0.6836, test loss-1.2716, acc-0.6912
Iter-4120, train loss-1.3256, acc-0.6200, valid loss-1.2681, acc-0.6840, test loss-1.2700, acc-0.6915
Iter-4130, train loss-1.1496, acc-0.7800, valid loss-1.2664, acc-0.6844, test loss-1.2683, acc-0.6916
Iter-4140, train loss-1.3261, acc-0.7000, valid loss-1.2647, acc-0.6848, test loss-1.2666, acc-0.6913
Iter-4150, train loss-1.2670, acc-0.7000, valid loss-1.2630, acc-0.6856, test loss-1.2649, acc-0.6917
Iter-4160, train loss-1.2510, acc-0.7000, valid loss-1.2612, acc-0.6862, test loss-1.2632, acc-0.6924
Iter-4170, train loss-1.3267, acc-0.6600, valid loss-1.2596, acc-0.6864, test loss-1.2616, acc-0.6923
Iter-4180, train loss-1.1307, acc-0.8200, valid loss-1.2579, acc-0.6860, test loss-1.2599, acc-0.6930
Iter-4190, train loss-1.3312, acc-0.6200, valid loss-1.2562, acc-0.6864, test loss-1.2582, acc-0.6936
Iter-4200, train loss-1.2551, acc-0.7200, valid loss-1.2546, acc-0.6876, test loss-1.2566, acc-0.6932
Iter-4210, train loss-1.2560, acc-0.6800, valid loss-1.2529, acc-0.6878, test loss-1.2549, acc-0.6940
Iter-4220, train loss-1.2810, acc-0.6400, valid loss-1.2513, acc-0.6882, test loss-1.2532, acc-0.6943
Iter-4230, train loss-1.1233, acc-0.7400, valid loss-1.2496, acc-0.6884, test loss-1.2516, acc-0.6953
Iter-4240, train loss-1.3799, acc-0.6600, valid loss-1.2480, acc-0.6888, test loss-1.2501, acc-0.6955
Iter-4250, train loss-1.1589, acc-0.7600, valid loss-1.2464, acc-0.6888, test loss-1.2484, acc-0.6956
Iter-4260, train loss-1.1870, acc-0.7800, valid loss-1.2447, acc-0.6892, test loss-1.2467, acc-0.6958
Iter-4270, train loss-1.2509, acc-0.7000, valid loss-1.2430, acc-0.6894, test loss-1.2451, acc-0.6965
Iter-4280, train loss-1.1989, acc-0.7400, valid loss-1.2414, acc-0.6896, test loss-1.2434, acc-0.6973
Iter-4290, train loss-1.2739, acc-0.6000, valid loss-1.2398, acc-0.6894, test loss-1.2418, acc-0.6976
Iter-4300, train loss-1.2622, acc-0.6400, valid loss-1.2382, acc-0.6900, test loss-1.2402, acc-0.6979
Iter-4310, train loss-1.1886, acc-0.8200, valid loss-1.2365, acc-0.6908, test loss-1.2385, acc-0.6990
Iter-4320, train loss-1.1538, acc-0.7600, valid loss-1.2348, acc-0.6910, test loss-1.2369, acc-0.6995
Iter-4330, train loss-1.2524, acc-0.7200, valid loss-1.2333, acc-0.6918, test loss-1.2353, acc-0.6999
Iter-4340, train loss-1.1488, acc-0.7200, valid loss-1.2317, acc-0.6918, test loss-1.2338, acc-0.7001
Iter-4350, train loss-1.0988, acc-0.7600, valid loss-1.2300, acc-0.6912, test loss-1.2320, acc-0.7006
Iter-4360, train loss-1.1674, acc-0.7400, valid loss-1.2283, acc-0.6924, test loss-1.2304, acc-0.7007
Iter-4370, train loss-1.0801, acc-0.8000, valid loss-1.2267, acc-0.6934, test loss-1.2288, acc-0.7018
Iter-4380, train loss-1.1352, acc-0.7600, valid loss-1.2251, acc-0.6946, test loss-1.2272, acc-0.7019
Iter-4390, train loss-1.2669, acc-0.5800, valid loss-1.2234, acc-0.6960, test loss-1.2256, acc-0.7024
Iter-4400, train loss-1.3089, acc-0.6000, valid loss-1.2219, acc-0.6962, test loss-1.2241, acc-0.7026
Iter-4410, train loss-1.3402, acc-0.6800, valid loss-1.2204, acc-0.6970, test loss-1.2226, acc-0.7028
Iter-4420, train loss-1.3654, acc-0.6800, valid loss-1.2189, acc-0.6968, test loss-1.2212, acc-0.7030
Iter-4430, train loss-1.3128, acc-0.6600, valid loss-1.2174, acc-0.6970, test loss-1.2196, acc-0.7035
Iter-4440, train loss-1.2420, acc-0.6800, valid loss-1.2157, acc-0.6980, test loss-1.2180, acc-0.7045
Iter-4450, train loss-1.4898, acc-0.5600, valid loss-1.2142, acc-0.6986, test loss-1.2165, acc-0.7052
Iter-4460, train loss-1.1948, acc-0.7400, valid loss-1.2125, acc-0.6990, test loss-1.2148, acc-0.7049
Iter-4470, train loss-1.1767, acc-0.8000, valid loss-1.2110, acc-0.6998, test loss-1.2133, acc-0.7053
Iter-4480, train loss-1.1879, acc-0.8000, valid loss-1.2094, acc-0.7000, test loss-1.2117, acc-0.7059
Iter-4490, train loss-1.1262, acc-0.7800, valid loss-1.2079, acc-0.7010, test loss-1.2101, acc-0.7068
Iter-4500, train loss-1.1558, acc-0.7600, valid loss-1.2063, acc-0.7012, test loss-1.2087, acc-0.7072
Iter-4510, train loss-0.9491, acc-0.9200, valid loss-1.2048, acc-0.7014, test loss-1.2071, acc-0.7079
Iter-4520, train loss-1.1621, acc-0.7800, valid loss-1.2032, acc-0.7018, test loss-1.2056, acc-0.7088
Iter-4530, train loss-1.2109, acc-0.6800, valid loss-1.2017, acc-0.7020, test loss-1.2041, acc-0.7091
Iter-4540, train loss-1.1632, acc-0.7600, valid loss-1.2002, acc-0.7026, test loss-1.2025, acc-0.7094
Iter-4550, train loss-1.2151, acc-0.6600, valid loss-1.1987, acc-0.7032, test loss-1.2010, acc-0.7098
Iter-4560, train loss-1.2193, acc-0.6800, valid loss-1.1972, acc-0.7040, test loss-1.1995, acc-0.7107
Iter-4570, train loss-1.3060, acc-0.6800, valid loss-1.1957, acc-0.7048, test loss-1.1980, acc-0.7112
Iter-4580, train loss-1.2418, acc-0.7000, valid loss-1.1942, acc-0.7060, test loss-1.1965, acc-0.7120
Iter-4590, train loss-1.2382, acc-0.6800, valid loss-1.1927, acc-0.7066, test loss-1.1950, acc-0.7123
Iter-4600, train loss-1.1368, acc-0.7200, valid loss-1.1912, acc-0.7074, test loss-1.1935, acc-0.7127
Iter-4610, train loss-1.2274, acc-0.6600, valid loss-1.1897, acc-0.7074, test loss-1.1920, acc-0.7129
Iter-4620, train loss-1.3695, acc-0.6000, valid loss-1.1882, acc-0.7074, test loss-1.1906, acc-0.7130
Iter-4630, train loss-1.3892, acc-0.6200, valid loss-1.1866, acc-0.7080, test loss-1.1890, acc-0.7142
Iter-4640, train loss-1.2155, acc-0.7000, valid loss-1.1852, acc-0.7082, test loss-1.1876, acc-0.7143
Iter-4650, train loss-1.0077, acc-0.8200, valid loss-1.1836, acc-0.7090, test loss-1.1861, acc-0.7151
Iter-4660, train loss-1.1529, acc-0.7000, valid loss-1.1822, acc-0.7090, test loss-1.1846, acc-0.7151
Iter-4670, train loss-1.1714, acc-0.7800, valid loss-1.1806, acc-0.7100, test loss-1.1831, acc-0.7158
Iter-4680, train loss-1.3096, acc-0.6000, valid loss-1.1792, acc-0.7100, test loss-1.1817, acc-0.7167
Iter-4690, train loss-1.0063, acc-0.8200, valid loss-1.1778, acc-0.7106, test loss-1.1803, acc-0.7170
Iter-4700, train loss-1.1093, acc-0.7200, valid loss-1.1764, acc-0.7116, test loss-1.1789, acc-0.7171
Iter-4710, train loss-1.2829, acc-0.6200, valid loss-1.1750, acc-0.7116, test loss-1.1774, acc-0.7179
Iter-4720, train loss-1.1645, acc-0.7000, valid loss-1.1735, acc-0.7124, test loss-1.1760, acc-0.7179
Iter-4730, train loss-1.2445, acc-0.6600, valid loss-1.1721, acc-0.7126, test loss-1.1746, acc-0.7182
Iter-4740, train loss-1.2994, acc-0.6800, valid loss-1.1706, acc-0.7130, test loss-1.1731, acc-0.7189
Iter-4750, train loss-1.1874, acc-0.7000, valid loss-1.1691, acc-0.7138, test loss-1.1716, acc-0.7195
Iter-4760, train loss-1.1584, acc-0.6600, valid loss-1.1677, acc-0.7140, test loss-1.1702, acc-0.7199
Iter-4770, train loss-1.1119, acc-0.7400, valid loss-1.1663, acc-0.7140, test loss-1.1687, acc-0.7199
Iter-4780, train loss-1.1499, acc-0.7000, valid loss-1.1647, acc-0.7146, test loss-1.1672, acc-0.7206
Iter-4790, train loss-1.1982, acc-0.7200, valid loss-1.1633, acc-0.7146, test loss-1.1658, acc-0.7210
Iter-4800, train loss-1.0651, acc-0.7400, valid loss-1.1618, acc-0.7148, test loss-1.1643, acc-0.7213
Iter-4810, train loss-1.1110, acc-0.7800, valid loss-1.1605, acc-0.7142, test loss-1.1630, acc-0.7214
Iter-4820, train loss-1.2178, acc-0.7200, valid loss-1.1591, acc-0.7148, test loss-1.1616, acc-0.7220
Iter-4830, train loss-1.2381, acc-0.7200, valid loss-1.1575, acc-0.7146, test loss-1.1601, acc-0.7224
Iter-4840, train loss-1.0505, acc-0.7600, valid loss-1.1562, acc-0.7150, test loss-1.1587, acc-0.7226
Iter-4850, train loss-1.1885, acc-0.7200, valid loss-1.1548, acc-0.7146, test loss-1.1574, acc-0.7230
Iter-4860, train loss-1.1697, acc-0.7800, valid loss-1.1533, acc-0.7148, test loss-1.1559, acc-0.7232
Iter-4870, train loss-1.3285, acc-0.6200, valid loss-1.1519, acc-0.7162, test loss-1.1546, acc-0.7238
Iter-4880, train loss-1.3526, acc-0.6400, valid loss-1.1505, acc-0.7176, test loss-1.1532, acc-0.7248
Iter-4890, train loss-1.0535, acc-0.7400, valid loss-1.1491, acc-0.7176, test loss-1.1518, acc-0.7251
Iter-4900, train loss-1.0638, acc-0.7800, valid loss-1.1476, acc-0.7186, test loss-1.1504, acc-0.7258
Iter-4910, train loss-1.1879, acc-0.7600, valid loss-1.1463, acc-0.7182, test loss-1.1490, acc-0.7260
Iter-4920, train loss-1.1323, acc-0.7200, valid loss-1.1449, acc-0.7190, test loss-1.1476, acc-0.7267
Iter-4930, train loss-1.1950, acc-0.6800, valid loss-1.1435, acc-0.7192, test loss-1.1462, acc-0.7270
Iter-4940, train loss-1.1032, acc-0.6600, valid loss-1.1421, acc-0.7192, test loss-1.1448, acc-0.7277
Iter-4950, train loss-1.2128, acc-0.7200, valid loss-1.1407, acc-0.7196, test loss-1.1435, acc-0.7280
Iter-4960, train loss-1.0199, acc-0.7600, valid loss-1.1393, acc-0.7206, test loss-1.1421, acc-0.7285
Iter-4970, train loss-1.1449, acc-0.7200, valid loss-1.1380, acc-0.7206, test loss-1.1407, acc-0.7286
Iter-4980, train loss-1.2111, acc-0.7200, valid loss-1.1366, acc-0.7210, test loss-1.1394, acc-0.7292
Iter-4990, train loss-1.2814, acc-0.6400, valid loss-1.1352, acc-0.7220, test loss-1.1381, acc-0.7298
Iter-5000, train loss-1.1928, acc-0.7200, valid loss-1.1339, acc-0.7226, test loss-1.1367, acc-0.7299
Iter-5010, train loss-1.0458, acc-0.8200, valid loss-1.1326, acc-0.7228, test loss-1.1355, acc-0.7305
Iter-5020, train loss-1.4510, acc-0.5800, valid loss-1.1313, acc-0.7230, test loss-1.1341, acc-0.7307
Iter-5030, train loss-0.9653, acc-0.7600, valid loss-1.1299, acc-0.7234, test loss-1.1328, acc-0.7312
Iter-5040, train loss-1.1264, acc-0.7000, valid loss-1.1286, acc-0.7232, test loss-1.1315, acc-0.7310
Iter-5050, train loss-1.2159, acc-0.6200, valid loss-1.1272, acc-0.7244, test loss-1.1301, acc-0.7317
Iter-5060, train loss-1.0983, acc-0.7800, valid loss-1.1259, acc-0.7246, test loss-1.1288, acc-0.7325
Iter-5070, train loss-1.0225, acc-0.8000, valid loss-1.1246, acc-0.7248, test loss-1.1275, acc-0.7329
Iter-5080, train loss-1.1085, acc-0.7600, valid loss-1.1233, acc-0.7250, test loss-1.1262, acc-0.7329
Iter-5090, train loss-1.1035, acc-0.8000, valid loss-1.1220, acc-0.7252, test loss-1.1249, acc-0.7334
Iter-5100, train loss-1.1004, acc-0.7000, valid loss-1.1206, acc-0.7254, test loss-1.1235, acc-0.7335
Iter-5110, train loss-1.0929, acc-0.7800, valid loss-1.1192, acc-0.7256, test loss-1.1221, acc-0.7336
Iter-5120, train loss-1.0326, acc-0.7800, valid loss-1.1178, acc-0.7256, test loss-1.1208, acc-0.7335
Iter-5130, train loss-1.1981, acc-0.6800, valid loss-1.1164, acc-0.7256, test loss-1.1195, acc-0.7339
Iter-5140, train loss-1.1946, acc-0.6800, valid loss-1.1151, acc-0.7256, test loss-1.1182, acc-0.7339
Iter-5150, train loss-1.1805, acc-0.6600, valid loss-1.1138, acc-0.7256, test loss-1.1170, acc-0.7342
Iter-5160, train loss-1.4591, acc-0.5600, valid loss-1.1126, acc-0.7264, test loss-1.1158, acc-0.7347
Iter-5170, train loss-1.1683, acc-0.6800, valid loss-1.1113, acc-0.7264, test loss-1.1145, acc-0.7345
Iter-5180, train loss-1.1524, acc-0.7400, valid loss-1.1100, acc-0.7268, test loss-1.1132, acc-0.7357
Iter-5190, train loss-1.1848, acc-0.6800, valid loss-1.1087, acc-0.7272, test loss-1.1119, acc-0.7356
Iter-5200, train loss-1.1844, acc-0.6000, valid loss-1.1074, acc-0.7268, test loss-1.1107, acc-0.7358
Iter-5210, train loss-1.2661, acc-0.6000, valid loss-1.1061, acc-0.7276, test loss-1.1094, acc-0.7356
Iter-5220, train loss-1.0035, acc-0.7400, valid loss-1.1048, acc-0.7276, test loss-1.1081, acc-0.7360
Iter-5230, train loss-1.1920, acc-0.7400, valid loss-1.1035, acc-0.7282, test loss-1.1068, acc-0.7365
Iter-5240, train loss-1.1142, acc-0.7000, valid loss-1.1023, acc-0.7284, test loss-1.1056, acc-0.7368
Iter-5250, train loss-1.2365, acc-0.6400, valid loss-1.1011, acc-0.7292, test loss-1.1044, acc-0.7378
Iter-5260, train loss-1.0595, acc-0.7200, valid loss-1.0998, acc-0.7298, test loss-1.1032, acc-0.7380
Iter-5270, train loss-1.2367, acc-0.7200, valid loss-1.0986, acc-0.7310, test loss-1.1019, acc-0.7381
Iter-5280, train loss-1.0427, acc-0.7800, valid loss-1.0974, acc-0.7308, test loss-1.1007, acc-0.7380
Iter-5290, train loss-1.0812, acc-0.7800, valid loss-1.0961, acc-0.7312, test loss-1.0994, acc-0.7383
Iter-5300, train loss-1.1171, acc-0.6600, valid loss-1.0949, acc-0.7312, test loss-1.0982, acc-0.7389
Iter-5310, train loss-1.1803, acc-0.6600, valid loss-1.0936, acc-0.7312, test loss-1.0969, acc-0.7394
Iter-5320, train loss-1.2197, acc-0.7400, valid loss-1.0922, acc-0.7316, test loss-1.0956, acc-0.7405
Iter-5330, train loss-1.0342, acc-0.8000, valid loss-1.0910, acc-0.7316, test loss-1.0944, acc-0.7413
Iter-5340, train loss-1.0747, acc-0.7800, valid loss-1.0898, acc-0.7320, test loss-1.0932, acc-0.7415
Iter-5350, train loss-1.1273, acc-0.7600, valid loss-1.0885, acc-0.7328, test loss-1.0919, acc-0.7416
Iter-5360, train loss-1.0264, acc-0.7200, valid loss-1.0872, acc-0.7330, test loss-1.0907, acc-0.7414
Iter-5370, train loss-1.0686, acc-0.7800, valid loss-1.0860, acc-0.7332, test loss-1.0895, acc-0.7412
Iter-5380, train loss-1.2796, acc-0.6400, valid loss-1.0848, acc-0.7334, test loss-1.0884, acc-0.7415
Iter-5390, train loss-1.0431, acc-0.7600, valid loss-1.0837, acc-0.7334, test loss-1.0872, acc-0.7418
Iter-5400, train loss-1.0887, acc-0.7800, valid loss-1.0824, acc-0.7334, test loss-1.0860, acc-0.7416
Iter-5410, train loss-1.1169, acc-0.7400, valid loss-1.0813, acc-0.7336, test loss-1.0848, acc-0.7422
Iter-5420, train loss-0.9724, acc-0.8200, valid loss-1.0800, acc-0.7338, test loss-1.0836, acc-0.7432
Iter-5430, train loss-0.9975, acc-0.8400, valid loss-1.0788, acc-0.7348, test loss-1.0824, acc-0.7434
Iter-5440, train loss-0.9727, acc-0.7800, valid loss-1.0776, acc-0.7350, test loss-1.0812, acc-0.7434
Iter-5450, train loss-1.2022, acc-0.6600, valid loss-1.0764, acc-0.7346, test loss-1.0801, acc-0.7435
Iter-5460, train loss-1.1646, acc-0.7400, valid loss-1.0753, acc-0.7352, test loss-1.0789, acc-0.7443
Iter-5470, train loss-1.1100, acc-0.7400, valid loss-1.0741, acc-0.7348, test loss-1.0778, acc-0.7441
Iter-5480, train loss-1.0416, acc-0.7800, valid loss-1.0729, acc-0.7356, test loss-1.0766, acc-0.7445
Iter-5490, train loss-1.1638, acc-0.7200, valid loss-1.0717, acc-0.7360, test loss-1.0754, acc-0.7448
Iter-5500, train loss-1.0357, acc-0.8000, valid loss-1.0705, acc-0.7362, test loss-1.0742, acc-0.7452
Iter-5510, train loss-0.9685, acc-0.8600, valid loss-1.0693, acc-0.7366, test loss-1.0730, acc-0.7451
Iter-5520, train loss-1.1602, acc-0.6600, valid loss-1.0682, acc-0.7364, test loss-1.0718, acc-0.7452
Iter-5530, train loss-1.0119, acc-0.8000, valid loss-1.0670, acc-0.7368, test loss-1.0706, acc-0.7453
Iter-5540, train loss-1.0802, acc-0.7800, valid loss-1.0657, acc-0.7370, test loss-1.0694, acc-0.7455
Iter-5550, train loss-1.0818, acc-0.7600, valid loss-1.0646, acc-0.7372, test loss-1.0683, acc-0.7456
Iter-5560, train loss-1.1168, acc-0.7000, valid loss-1.0634, acc-0.7374, test loss-1.0671, acc-0.7458
Iter-5570, train loss-0.9953, acc-0.7600, valid loss-1.0623, acc-0.7370, test loss-1.0660, acc-0.7458
Iter-5580, train loss-0.8699, acc-0.8400, valid loss-1.0611, acc-0.7378, test loss-1.0648, acc-0.7462
Iter-5590, train loss-1.0532, acc-0.7600, valid loss-1.0599, acc-0.7384, test loss-1.0636, acc-0.7466
Iter-5600, train loss-1.0089, acc-0.7600, valid loss-1.0588, acc-0.7382, test loss-1.0625, acc-0.7470
Iter-5610, train loss-0.9035, acc-0.8400, valid loss-1.0576, acc-0.7390, test loss-1.0613, acc-0.7469
Iter-5620, train loss-1.1051, acc-0.7600, valid loss-1.0565, acc-0.7388, test loss-1.0603, acc-0.7470
Iter-5630, train loss-1.1437, acc-0.7400, valid loss-1.0554, acc-0.7392, test loss-1.0592, acc-0.7474
Iter-5640, train loss-1.0750, acc-0.6400, valid loss-1.0544, acc-0.7400, test loss-1.0581, acc-0.7475
Iter-5650, train loss-0.9828, acc-0.7000, valid loss-1.0533, acc-0.7400, test loss-1.0570, acc-0.7481
Iter-5660, train loss-0.9881, acc-0.8000, valid loss-1.0521, acc-0.7404, test loss-1.0559, acc-0.7478
Iter-5670, train loss-1.0885, acc-0.7600, valid loss-1.0510, acc-0.7402, test loss-1.0547, acc-0.7478
Iter-5680, train loss-1.0014, acc-0.7200, valid loss-1.0498, acc-0.7406, test loss-1.0536, acc-0.7483
Iter-5690, train loss-1.1004, acc-0.7000, valid loss-1.0487, acc-0.7412, test loss-1.0525, acc-0.7486
Iter-5700, train loss-1.0044, acc-0.8400, valid loss-1.0476, acc-0.7412, test loss-1.0514, acc-0.7488
Iter-5710, train loss-1.1254, acc-0.7400, valid loss-1.0465, acc-0.7414, test loss-1.0503, acc-0.7488
Iter-5720, train loss-1.1770, acc-0.7600, valid loss-1.0454, acc-0.7418, test loss-1.0492, acc-0.7493
Iter-5730, train loss-1.0881, acc-0.7200, valid loss-1.0443, acc-0.7416, test loss-1.0481, acc-0.7495
Iter-5740, train loss-0.9649, acc-0.7800, valid loss-1.0432, acc-0.7416, test loss-1.0470, acc-0.7493
Iter-5750, train loss-0.9826, acc-0.7800, valid loss-1.0422, acc-0.7422, test loss-1.0460, acc-0.7494
Iter-5760, train loss-1.0518, acc-0.8000, valid loss-1.0412, acc-0.7422, test loss-1.0449, acc-0.7496
Iter-5770, train loss-1.0837, acc-0.7000, valid loss-1.0401, acc-0.7420, test loss-1.0438, acc-0.7502
Iter-5780, train loss-0.9625, acc-0.7600, valid loss-1.0390, acc-0.7422, test loss-1.0427, acc-0.7505
Iter-5790, train loss-0.9186, acc-0.8200, valid loss-1.0379, acc-0.7428, test loss-1.0416, acc-0.7508
Iter-5800, train loss-0.9762, acc-0.8400, valid loss-1.0368, acc-0.7430, test loss-1.0405, acc-0.7507
Iter-5810, train loss-1.0035, acc-0.8200, valid loss-1.0356, acc-0.7428, test loss-1.0393, acc-0.7505
Iter-5820, train loss-1.1874, acc-0.6600, valid loss-1.0345, acc-0.7430, test loss-1.0382, acc-0.7512
Iter-5830, train loss-0.9459, acc-0.7600, valid loss-1.0335, acc-0.7438, test loss-1.0372, acc-0.7517
Iter-5840, train loss-1.0415, acc-0.7200, valid loss-1.0323, acc-0.7448, test loss-1.0361, acc-0.7516
Iter-5850, train loss-0.8866, acc-0.8600, valid loss-1.0311, acc-0.7442, test loss-1.0349, acc-0.7519
Iter-5860, train loss-1.1600, acc-0.8000, valid loss-1.0301, acc-0.7448, test loss-1.0339, acc-0.7521
Iter-5870, train loss-0.9327, acc-0.8200, valid loss-1.0289, acc-0.7444, test loss-1.0328, acc-0.7517
Iter-5880, train loss-0.9390, acc-0.8000, valid loss-1.0279, acc-0.7446, test loss-1.0317, acc-0.7522
Iter-5890, train loss-1.0185, acc-0.8000, valid loss-1.0268, acc-0.7452, test loss-1.0306, acc-0.7526
Iter-5900, train loss-0.8803, acc-0.8200, valid loss-1.0257, acc-0.7450, test loss-1.0295, acc-0.7527
Iter-5910, train loss-0.9291, acc-0.8200, valid loss-1.0246, acc-0.7452, test loss-1.0285, acc-0.7531
Iter-5920, train loss-0.9869, acc-0.8000, valid loss-1.0235, acc-0.7454, test loss-1.0274, acc-0.7534
Iter-5930, train loss-0.9750, acc-0.7800, valid loss-1.0224, acc-0.7454, test loss-1.0263, acc-0.7537
Iter-5940, train loss-0.8971, acc-0.8600, valid loss-1.0214, acc-0.7456, test loss-1.0253, acc-0.7539
Iter-5950, train loss-0.9013, acc-0.8400, valid loss-1.0204, acc-0.7458, test loss-1.0243, acc-0.7541
Iter-5960, train loss-0.9835, acc-0.7800, valid loss-1.0193, acc-0.7468, test loss-1.0233, acc-0.7548
Iter-5970, train loss-0.9929, acc-0.8000, valid loss-1.0183, acc-0.7466, test loss-1.0222, acc-0.7550
Iter-5980, train loss-0.9905, acc-0.7400, valid loss-1.0172, acc-0.7468, test loss-1.0212, acc-0.7545
Iter-5990, train loss-1.1413, acc-0.7200, valid loss-1.0162, acc-0.7474, test loss-1.0202, acc-0.7550
Iter-6000, train loss-0.9886, acc-0.7600, valid loss-1.0152, acc-0.7468, test loss-1.0192, acc-0.7554
Iter-6010, train loss-1.0241, acc-0.7800, valid loss-1.0141, acc-0.7472, test loss-1.0181, acc-0.7556
Iter-6020, train loss-0.9625, acc-0.7800, valid loss-1.0130, acc-0.7474, test loss-1.0170, acc-0.7557
Iter-6030, train loss-1.1168, acc-0.7000, valid loss-1.0120, acc-0.7474, test loss-1.0159, acc-0.7558
Iter-6040, train loss-1.1291, acc-0.6800, valid loss-1.0109, acc-0.7482, test loss-1.0149, acc-0.7557
Iter-6050, train loss-1.0300, acc-0.7000, valid loss-1.0098, acc-0.7482, test loss-1.0138, acc-0.7563
Iter-6060, train loss-0.9107, acc-0.7600, valid loss-1.0088, acc-0.7488, test loss-1.0128, acc-0.7562
Iter-6070, train loss-0.9232, acc-0.8400, valid loss-1.0078, acc-0.7490, test loss-1.0117, acc-0.7568
Iter-6080, train loss-1.1697, acc-0.6600, valid loss-1.0067, acc-0.7488, test loss-1.0107, acc-0.7569
Iter-6090, train loss-0.9524, acc-0.8200, valid loss-1.0057, acc-0.7488, test loss-1.0097, acc-0.7571
Iter-6100, train loss-1.0499, acc-0.6600, valid loss-1.0047, acc-0.7494, test loss-1.0087, acc-0.7574
Iter-6110, train loss-0.9705, acc-0.8400, valid loss-1.0037, acc-0.7490, test loss-1.0077, acc-0.7574
Iter-6120, train loss-0.9786, acc-0.7200, valid loss-1.0028, acc-0.7488, test loss-1.0068, acc-0.7575
Iter-6130, train loss-1.2173, acc-0.7200, valid loss-1.0018, acc-0.7494, test loss-1.0057, acc-0.7573
Iter-6140, train loss-1.1881, acc-0.6600, valid loss-1.0007, acc-0.7504, test loss-1.0048, acc-0.7573
Iter-6150, train loss-1.0021, acc-0.7600, valid loss-0.9997, acc-0.7506, test loss-1.0037, acc-0.7573
Iter-6160, train loss-1.2650, acc-0.6000, valid loss-0.9986, acc-0.7504, test loss-1.0027, acc-0.7573
Iter-6170, train loss-1.0740, acc-0.6800, valid loss-0.9977, acc-0.7512, test loss-1.0017, acc-0.7575
Iter-6180, train loss-0.9596, acc-0.7800, valid loss-0.9967, acc-0.7512, test loss-1.0007, acc-0.7576
Iter-6190, train loss-0.9641, acc-0.7000, valid loss-0.9957, acc-0.7516, test loss-0.9997, acc-0.7574
Iter-6200, train loss-1.0013, acc-0.7200, valid loss-0.9948, acc-0.7514, test loss-0.9988, acc-0.7579
Iter-6210, train loss-1.1025, acc-0.7200, valid loss-0.9938, acc-0.7518, test loss-0.9978, acc-0.7585
Iter-6220, train loss-0.9480, acc-0.8200, valid loss-0.9928, acc-0.7518, test loss-0.9968, acc-0.7581
Iter-6230, train loss-1.1589, acc-0.6000, valid loss-0.9918, acc-0.7522, test loss-0.9959, acc-0.7584
Iter-6240, train loss-1.0905, acc-0.6800, valid loss-0.9909, acc-0.7518, test loss-0.9949, acc-0.7589
Iter-6250, train loss-1.0239, acc-0.8000, valid loss-0.9898, acc-0.7518, test loss-0.9939, acc-0.7594
Iter-6260, train loss-1.0575, acc-0.7400, valid loss-0.9889, acc-0.7514, test loss-0.9930, acc-0.7594
Iter-6270, train loss-1.0281, acc-0.7800, valid loss-0.9879, acc-0.7516, test loss-0.9920, acc-0.7594
Iter-6280, train loss-1.0948, acc-0.7000, valid loss-0.9870, acc-0.7520, test loss-0.9910, acc-0.7601
Iter-6290, train loss-0.8456, acc-0.8800, valid loss-0.9860, acc-0.7514, test loss-0.9900, acc-0.7601
Iter-6300, train loss-1.0729, acc-0.6600, valid loss-0.9851, acc-0.7516, test loss-0.9890, acc-0.7606
Iter-6310, train loss-1.1475, acc-0.6400, valid loss-0.9842, acc-0.7520, test loss-0.9881, acc-0.7609
Iter-6320, train loss-0.9097, acc-0.8000, valid loss-0.9832, acc-0.7524, test loss-0.9871, acc-0.7611
Iter-6330, train loss-1.0163, acc-0.7600, valid loss-0.9823, acc-0.7524, test loss-0.9861, acc-0.7611
Iter-6340, train loss-0.9207, acc-0.7800, valid loss-0.9813, acc-0.7530, test loss-0.9851, acc-0.7609
Iter-6350, train loss-1.0676, acc-0.7200, valid loss-0.9803, acc-0.7530, test loss-0.9842, acc-0.7611
Iter-6360, train loss-0.8324, acc-0.8000, valid loss-0.9793, acc-0.7530, test loss-0.9832, acc-0.7613
Iter-6370, train loss-1.0579, acc-0.7400, valid loss-0.9783, acc-0.7536, test loss-0.9822, acc-0.7615
Iter-6380, train loss-1.0889, acc-0.6800, valid loss-0.9774, acc-0.7530, test loss-0.9813, acc-0.7617
Iter-6390, train loss-0.9548, acc-0.8200, valid loss-0.9764, acc-0.7534, test loss-0.9803, acc-0.7617
Iter-6400, train loss-1.0564, acc-0.7000, valid loss-0.9754, acc-0.7540, test loss-0.9793, acc-0.7616
Iter-6410, train loss-1.0226, acc-0.7800, valid loss-0.9745, acc-0.7540, test loss-0.9784, acc-0.7616
Iter-6420, train loss-0.8577, acc-0.7400, valid loss-0.9736, acc-0.7544, test loss-0.9775, acc-0.7621
Iter-6430, train loss-0.9739, acc-0.7200, valid loss-0.9726, acc-0.7544, test loss-0.9766, acc-0.7623
Iter-6440, train loss-0.9895, acc-0.7200, valid loss-0.9717, acc-0.7552, test loss-0.9756, acc-0.7624
Iter-6450, train loss-0.9722, acc-0.7600, valid loss-0.9707, acc-0.7552, test loss-0.9747, acc-0.7629
Iter-6460, train loss-0.9987, acc-0.8000, valid loss-0.9698, acc-0.7554, test loss-0.9737, acc-0.7632
Iter-6470, train loss-0.9413, acc-0.8200, valid loss-0.9689, acc-0.7554, test loss-0.9728, acc-0.7631
Iter-6480, train loss-0.9385, acc-0.7400, valid loss-0.9680, acc-0.7560, test loss-0.9719, acc-0.7633
Iter-6490, train loss-1.0822, acc-0.6800, valid loss-0.9671, acc-0.7560, test loss-0.9710, acc-0.7636
Iter-6500, train loss-1.1024, acc-0.6400, valid loss-0.9661, acc-0.7558, test loss-0.9700, acc-0.7646
Iter-6510, train loss-0.9030, acc-0.8600, valid loss-0.9653, acc-0.7552, test loss-0.9691, acc-0.7647
Iter-6520, train loss-0.9940, acc-0.7800, valid loss-0.9644, acc-0.7552, test loss-0.9682, acc-0.7648
Iter-6530, train loss-1.2184, acc-0.6800, valid loss-0.9635, acc-0.7564, test loss-0.9673, acc-0.7654
Iter-6540, train loss-1.1395, acc-0.7400, valid loss-0.9626, acc-0.7564, test loss-0.9664, acc-0.7662
Iter-6550, train loss-0.8634, acc-0.8400, valid loss-0.9617, acc-0.7564, test loss-0.9655, acc-0.7664
Iter-6560, train loss-0.8834, acc-0.8000, valid loss-0.9608, acc-0.7570, test loss-0.9646, acc-0.7668
Iter-6570, train loss-1.0611, acc-0.7000, valid loss-0.9599, acc-0.7574, test loss-0.9637, acc-0.7670
Iter-6580, train loss-1.0063, acc-0.7200, valid loss-0.9590, acc-0.7576, test loss-0.9628, acc-0.7675
Iter-6590, train loss-0.9363, acc-0.7600, valid loss-0.9581, acc-0.7580, test loss-0.9619, acc-0.7677
Iter-6600, train loss-1.1962, acc-0.6600, valid loss-0.9572, acc-0.7584, test loss-0.9610, acc-0.7684
Iter-6610, train loss-0.8899, acc-0.8200, valid loss-0.9563, acc-0.7590, test loss-0.9601, acc-0.7682
Iter-6620, train loss-0.8723, acc-0.7600, valid loss-0.9555, acc-0.7588, test loss-0.9593, acc-0.7689
Iter-6630, train loss-1.0044, acc-0.7200, valid loss-0.9546, acc-0.7590, test loss-0.9584, acc-0.7692
Iter-6640, train loss-0.8985, acc-0.7000, valid loss-0.9538, acc-0.7590, test loss-0.9576, acc-0.7690
Iter-6650, train loss-1.0726, acc-0.7000, valid loss-0.9528, acc-0.7592, test loss-0.9567, acc-0.7689
Iter-6660, train loss-0.8161, acc-0.9000, valid loss-0.9519, acc-0.7592, test loss-0.9558, acc-0.7692
Iter-6670, train loss-0.9827, acc-0.6800, valid loss-0.9510, acc-0.7600, test loss-0.9549, acc-0.7699
Iter-6680, train loss-0.8773, acc-0.8000, valid loss-0.9502, acc-0.7602, test loss-0.9540, acc-0.7699
Iter-6690, train loss-0.9912, acc-0.7600, valid loss-0.9492, acc-0.7602, test loss-0.9531, acc-0.7704
Iter-6700, train loss-1.1330, acc-0.6600, valid loss-0.9484, acc-0.7602, test loss-0.9522, acc-0.7708
Iter-6710, train loss-0.8818, acc-0.7600, valid loss-0.9475, acc-0.7598, test loss-0.9513, acc-0.7709
Iter-6720, train loss-1.0247, acc-0.7000, valid loss-0.9466, acc-0.7604, test loss-0.9505, acc-0.7708
Iter-6730, train loss-0.8608, acc-0.8200, valid loss-0.9458, acc-0.7602, test loss-0.9496, acc-0.7707
Iter-6740, train loss-1.0451, acc-0.7200, valid loss-0.9450, acc-0.7600, test loss-0.9487, acc-0.7711
Iter-6750, train loss-0.8742, acc-0.7600, valid loss-0.9441, acc-0.7598, test loss-0.9478, acc-0.7713
Iter-6760, train loss-0.8782, acc-0.8000, valid loss-0.9432, acc-0.7602, test loss-0.9469, acc-0.7721
Iter-6770, train loss-1.1496, acc-0.6800, valid loss-0.9424, acc-0.7602, test loss-0.9461, acc-0.7730
Iter-6780, train loss-1.0605, acc-0.7000, valid loss-0.9416, acc-0.7604, test loss-0.9453, acc-0.7730
Iter-6790, train loss-1.1469, acc-0.6400, valid loss-0.9408, acc-0.7604, test loss-0.9445, acc-0.7725
Iter-6800, train loss-0.8637, acc-0.8600, valid loss-0.9398, acc-0.7606, test loss-0.9435, acc-0.7728
Iter-6810, train loss-1.0482, acc-0.7200, valid loss-0.9389, acc-0.7612, test loss-0.9426, acc-0.7731
Iter-6820, train loss-0.8619, acc-0.7400, valid loss-0.9381, acc-0.7612, test loss-0.9418, acc-0.7733
Iter-6830, train loss-0.9817, acc-0.7800, valid loss-0.9373, acc-0.7606, test loss-0.9410, acc-0.7736
Iter-6840, train loss-0.9209, acc-0.7400, valid loss-0.9364, acc-0.7612, test loss-0.9401, acc-0.7735
Iter-6850, train loss-1.0590, acc-0.7000, valid loss-0.9356, acc-0.7616, test loss-0.9393, acc-0.7734
Iter-6860, train loss-0.9307, acc-0.7000, valid loss-0.9347, acc-0.7618, test loss-0.9384, acc-0.7736
Iter-6870, train loss-0.9870, acc-0.8000, valid loss-0.9338, acc-0.7624, test loss-0.9375, acc-0.7735
Iter-6880, train loss-0.8668, acc-0.8000, valid loss-0.9330, acc-0.7626, test loss-0.9366, acc-0.7739
Iter-6890, train loss-1.1196, acc-0.7800, valid loss-0.9322, acc-0.7628, test loss-0.9358, acc-0.7739
Iter-6900, train loss-0.9681, acc-0.7400, valid loss-0.9313, acc-0.7632, test loss-0.9350, acc-0.7741
Iter-6910, train loss-1.1533, acc-0.7600, valid loss-0.9305, acc-0.7636, test loss-0.9342, acc-0.7740
Iter-6920, train loss-1.0226, acc-0.6600, valid loss-0.9297, acc-0.7636, test loss-0.9334, acc-0.7741
Iter-6930, train loss-0.8703, acc-0.8400, valid loss-0.9289, acc-0.7634, test loss-0.9326, acc-0.7747
Iter-6940, train loss-0.9108, acc-0.7600, valid loss-0.9280, acc-0.7630, test loss-0.9317, acc-0.7752
Iter-6950, train loss-1.0638, acc-0.7400, valid loss-0.9272, acc-0.7632, test loss-0.9309, acc-0.7756
Iter-6960, train loss-0.8979, acc-0.7000, valid loss-0.9264, acc-0.7632, test loss-0.9301, acc-0.7756
Iter-6970, train loss-0.9404, acc-0.7400, valid loss-0.9257, acc-0.7638, test loss-0.9293, acc-0.7758
Iter-6980, train loss-0.8151, acc-0.8800, valid loss-0.9248, acc-0.7634, test loss-0.9285, acc-0.7758
Iter-6990, train loss-0.8930, acc-0.8200, valid loss-0.9240, acc-0.7638, test loss-0.9277, acc-0.7755
Iter-7000, train loss-0.7995, acc-0.8000, valid loss-0.9233, acc-0.7646, test loss-0.9269, acc-0.7757
Iter-7010, train loss-0.7267, acc-0.8600, valid loss-0.9224, acc-0.7646, test loss-0.9260, acc-0.7759
Iter-7020, train loss-0.9156, acc-0.8000, valid loss-0.9216, acc-0.7650, test loss-0.9252, acc-0.7759
Iter-7030, train loss-0.9025, acc-0.9000, valid loss-0.9208, acc-0.7654, test loss-0.9243, acc-0.7760
Iter-7040, train loss-0.9060, acc-0.7800, valid loss-0.9200, acc-0.7656, test loss-0.9235, acc-0.7762
Iter-7050, train loss-1.0027, acc-0.8000, valid loss-0.9191, acc-0.7656, test loss-0.9227, acc-0.7765
Iter-7060, train loss-1.0797, acc-0.7400, valid loss-0.9184, acc-0.7660, test loss-0.9219, acc-0.7766
Iter-7070, train loss-0.8798, acc-0.8200, valid loss-0.9176, acc-0.7658, test loss-0.9211, acc-0.7763
Iter-7080, train loss-0.8442, acc-0.7200, valid loss-0.9168, acc-0.7652, test loss-0.9203, acc-0.7764
Iter-7090, train loss-0.8919, acc-0.8400, valid loss-0.9160, acc-0.7662, test loss-0.9195, acc-0.7764
Iter-7100, train loss-0.9232, acc-0.7000, valid loss-0.9151, acc-0.7666, test loss-0.9187, acc-0.7768
Iter-7110, train loss-0.7984, acc-0.8600, valid loss-0.9143, acc-0.7672, test loss-0.9180, acc-0.7768
Iter-7120, train loss-0.8369, acc-0.8400, valid loss-0.9135, acc-0.7674, test loss-0.9171, acc-0.7767
Iter-7130, train loss-0.8896, acc-0.8000, valid loss-0.9128, acc-0.7672, test loss-0.9164, acc-0.7768
Iter-7140, train loss-0.9158, acc-0.7000, valid loss-0.9119, acc-0.7676, test loss-0.9155, acc-0.7767
Iter-7150, train loss-0.9465, acc-0.8000, valid loss-0.9111, acc-0.7674, test loss-0.9148, acc-0.7767
Iter-7160, train loss-1.0159, acc-0.7200, valid loss-0.9103, acc-0.7676, test loss-0.9140, acc-0.7772
Iter-7170, train loss-0.9842, acc-0.7200, valid loss-0.9095, acc-0.7680, test loss-0.9132, acc-0.7775
Iter-7180, train loss-0.7518, acc-0.9000, valid loss-0.9087, acc-0.7682, test loss-0.9124, acc-0.7773
Iter-7190, train loss-1.1358, acc-0.7000, valid loss-0.9080, acc-0.7686, test loss-0.9117, acc-0.7775
Iter-7200, train loss-1.0254, acc-0.6800, valid loss-0.9072, acc-0.7688, test loss-0.9108, acc-0.7775
Iter-7210, train loss-0.9610, acc-0.7400, valid loss-0.9064, acc-0.7686, test loss-0.9101, acc-0.7779
Iter-7220, train loss-0.9680, acc-0.7200, valid loss-0.9057, acc-0.7684, test loss-0.9093, acc-0.7782
Iter-7230, train loss-0.8554, acc-0.7800, valid loss-0.9048, acc-0.7682, test loss-0.9084, acc-0.7783
Iter-7240, train loss-0.9430, acc-0.8000, valid loss-0.9040, acc-0.7682, test loss-0.9076, acc-0.7790
Iter-7250, train loss-0.9900, acc-0.7400, valid loss-0.9032, acc-0.7684, test loss-0.9068, acc-0.7789
Iter-7260, train loss-0.8777, acc-0.7800, valid loss-0.9025, acc-0.7684, test loss-0.9060, acc-0.7791
Iter-7270, train loss-0.8816, acc-0.7600, valid loss-0.9016, acc-0.7686, test loss-0.9052, acc-0.7793
Iter-7280, train loss-1.0295, acc-0.6400, valid loss-0.9008, acc-0.7692, test loss-0.9044, acc-0.7802
Iter-7290, train loss-0.9231, acc-0.7000, valid loss-0.9000, acc-0.7698, test loss-0.9036, acc-0.7802
Iter-7300, train loss-0.9975, acc-0.6800, valid loss-0.8992, acc-0.7696, test loss-0.9028, acc-0.7803
Iter-7310, train loss-0.9197, acc-0.7800, valid loss-0.8984, acc-0.7706, test loss-0.9021, acc-0.7804
Iter-7320, train loss-0.8854, acc-0.7600, valid loss-0.8976, acc-0.7708, test loss-0.9013, acc-0.7807
Iter-7330, train loss-0.9988, acc-0.8200, valid loss-0.8968, acc-0.7706, test loss-0.9005, acc-0.7808
Iter-7340, train loss-0.9621, acc-0.7400, valid loss-0.8961, acc-0.7710, test loss-0.8998, acc-0.7808
Iter-7350, train loss-0.8947, acc-0.7400, valid loss-0.8954, acc-0.7712, test loss-0.8991, acc-0.7808
Iter-7360, train loss-0.8439, acc-0.8000, valid loss-0.8945, acc-0.7714, test loss-0.8983, acc-0.7810
Iter-7370, train loss-0.8808, acc-0.7200, valid loss-0.8938, acc-0.7722, test loss-0.8975, acc-0.7813
Iter-7380, train loss-0.9282, acc-0.7800, valid loss-0.8930, acc-0.7720, test loss-0.8967, acc-0.7812
Iter-7390, train loss-0.9665, acc-0.7200, valid loss-0.8922, acc-0.7724, test loss-0.8960, acc-0.7817
Iter-7400, train loss-0.8983, acc-0.8000, valid loss-0.8914, acc-0.7726, test loss-0.8952, acc-0.7818
Iter-7410, train loss-0.9956, acc-0.7400, valid loss-0.8907, acc-0.7724, test loss-0.8945, acc-0.7819
Iter-7420, train loss-0.9501, acc-0.7400, valid loss-0.8899, acc-0.7730, test loss-0.8937, acc-0.7825
Iter-7430, train loss-0.9038, acc-0.8600, valid loss-0.8893, acc-0.7722, test loss-0.8930, acc-0.7826
Iter-7440, train loss-0.9580, acc-0.7400, valid loss-0.8885, acc-0.7732, test loss-0.8923, acc-0.7826
Iter-7450, train loss-0.8115, acc-0.8400, valid loss-0.8877, acc-0.7734, test loss-0.8915, acc-0.7827
Iter-7460, train loss-0.9120, acc-0.7400, valid loss-0.8870, acc-0.7738, test loss-0.8907, acc-0.7832
Iter-7470, train loss-0.8681, acc-0.8000, valid loss-0.8862, acc-0.7732, test loss-0.8900, acc-0.7834
Iter-7480, train loss-0.7998, acc-0.8400, valid loss-0.8855, acc-0.7736, test loss-0.8892, acc-0.7832
Iter-7490, train loss-0.9286, acc-0.7400, valid loss-0.8847, acc-0.7738, test loss-0.8885, acc-0.7831
Iter-7500, train loss-0.8943, acc-0.7000, valid loss-0.8840, acc-0.7752, test loss-0.8877, acc-0.7833
Iter-7510, train loss-0.8966, acc-0.7800, valid loss-0.8833, acc-0.7748, test loss-0.8870, acc-0.7832
Iter-7520, train loss-0.9324, acc-0.7400, valid loss-0.8826, acc-0.7754, test loss-0.8862, acc-0.7835
Iter-7530, train loss-0.9275, acc-0.7600, valid loss-0.8818, acc-0.7760, test loss-0.8855, acc-0.7833
Iter-7540, train loss-0.9833, acc-0.7400, valid loss-0.8811, acc-0.7766, test loss-0.8847, acc-0.7837
Iter-7550, train loss-0.8002, acc-0.8000, valid loss-0.8803, acc-0.7770, test loss-0.8840, acc-0.7838
Iter-7560, train loss-0.9298, acc-0.8000, valid loss-0.8795, acc-0.7772, test loss-0.8832, acc-0.7838
Iter-7570, train loss-0.8038, acc-0.8200, valid loss-0.8787, acc-0.7782, test loss-0.8824, acc-0.7838
Iter-7580, train loss-0.9374, acc-0.7000, valid loss-0.8780, acc-0.7778, test loss-0.8817, acc-0.7843
Iter-7590, train loss-0.7481, acc-0.7800, valid loss-0.8773, acc-0.7788, test loss-0.8810, acc-0.7846
Iter-7600, train loss-1.0278, acc-0.7200, valid loss-0.8765, acc-0.7788, test loss-0.8802, acc-0.7845
Iter-7610, train loss-0.8635, acc-0.7800, valid loss-0.8757, acc-0.7790, test loss-0.8795, acc-0.7844
Iter-7620, train loss-0.9221, acc-0.7600, valid loss-0.8750, acc-0.7794, test loss-0.8787, acc-0.7847
Iter-7630, train loss-0.7923, acc-0.8200, valid loss-0.8742, acc-0.7796, test loss-0.8779, acc-0.7847
Iter-7640, train loss-0.9575, acc-0.7800, valid loss-0.8735, acc-0.7800, test loss-0.8772, acc-0.7848
Iter-7650, train loss-0.8221, acc-0.7800, valid loss-0.8728, acc-0.7812, test loss-0.8765, acc-0.7851
Iter-7660, train loss-0.9694, acc-0.7000, valid loss-0.8721, acc-0.7802, test loss-0.8758, acc-0.7851
Iter-7670, train loss-0.8852, acc-0.7600, valid loss-0.8714, acc-0.7808, test loss-0.8751, acc-0.7854
Iter-7680, train loss-0.8844, acc-0.8600, valid loss-0.8706, acc-0.7808, test loss-0.8744, acc-0.7857
Iter-7690, train loss-0.8543, acc-0.7800, valid loss-0.8699, acc-0.7810, test loss-0.8737, acc-0.7861
Iter-7700, train loss-0.8812, acc-0.7600, valid loss-0.8692, acc-0.7808, test loss-0.8730, acc-0.7862
Iter-7710, train loss-0.9759, acc-0.7200, valid loss-0.8685, acc-0.7812, test loss-0.8723, acc-0.7866
Iter-7720, train loss-0.7686, acc-0.8400, valid loss-0.8678, acc-0.7812, test loss-0.8716, acc-0.7870
Iter-7730, train loss-0.8314, acc-0.7600, valid loss-0.8671, acc-0.7818, test loss-0.8709, acc-0.7867
Iter-7740, train loss-0.8840, acc-0.8200, valid loss-0.8663, acc-0.7820, test loss-0.8702, acc-0.7872
Iter-7750, train loss-0.8798, acc-0.7400, valid loss-0.8656, acc-0.7822, test loss-0.8694, acc-0.7869
Iter-7760, train loss-0.8203, acc-0.7800, valid loss-0.8649, acc-0.7818, test loss-0.8687, acc-0.7872
Iter-7770, train loss-0.7554, acc-0.7800, valid loss-0.8642, acc-0.7818, test loss-0.8680, acc-0.7873
Iter-7780, train loss-0.7139, acc-0.8600, valid loss-0.8635, acc-0.7818, test loss-0.8672, acc-0.7874
Iter-7790, train loss-0.6928, acc-0.8800, valid loss-0.8627, acc-0.7812, test loss-0.8666, acc-0.7876
Iter-7800, train loss-0.7207, acc-0.9000, valid loss-0.8620, acc-0.7818, test loss-0.8659, acc-0.7879
Iter-7810, train loss-0.7457, acc-0.8400, valid loss-0.8613, acc-0.7820, test loss-0.8652, acc-0.7881
Iter-7820, train loss-0.9269, acc-0.7400, valid loss-0.8606, acc-0.7818, test loss-0.8645, acc-0.7882
Iter-7830, train loss-0.7785, acc-0.8400, valid loss-0.8598, acc-0.7824, test loss-0.8638, acc-0.7882
Iter-7840, train loss-0.9781, acc-0.6800, valid loss-0.8591, acc-0.7822, test loss-0.8631, acc-0.7883
Iter-7850, train loss-0.6780, acc-0.9200, valid loss-0.8584, acc-0.7824, test loss-0.8624, acc-0.7885
Iter-7860, train loss-0.9898, acc-0.7200, valid loss-0.8577, acc-0.7824, test loss-0.8617, acc-0.7885
Iter-7870, train loss-0.9734, acc-0.7600, valid loss-0.8570, acc-0.7824, test loss-0.8610, acc-0.7887
Iter-7880, train loss-0.8761, acc-0.7600, valid loss-0.8563, acc-0.7826, test loss-0.8604, acc-0.7888
Iter-7890, train loss-0.8866, acc-0.7200, valid loss-0.8557, acc-0.7826, test loss-0.8597, acc-0.7894
Iter-7900, train loss-0.9155, acc-0.8000, valid loss-0.8550, acc-0.7832, test loss-0.8590, acc-0.7895
Iter-7910, train loss-0.6887, acc-0.9200, valid loss-0.8542, acc-0.7834, test loss-0.8582, acc-0.7896
Iter-7920, train loss-0.8414, acc-0.7200, valid loss-0.8535, acc-0.7840, test loss-0.8575, acc-0.7900
Iter-7930, train loss-0.7980, acc-0.8400, valid loss-0.8529, acc-0.7844, test loss-0.8569, acc-0.7903
Iter-7940, train loss-0.8822, acc-0.8000, valid loss-0.8522, acc-0.7842, test loss-0.8562, acc-0.7902
Iter-7950, train loss-0.7776, acc-0.8600, valid loss-0.8515, acc-0.7846, test loss-0.8555, acc-0.7900
Iter-7960, train loss-0.8212, acc-0.8000, valid loss-0.8508, acc-0.7850, test loss-0.8548, acc-0.7903
Iter-7970, train loss-0.9106, acc-0.7400, valid loss-0.8502, acc-0.7852, test loss-0.8542, acc-0.7906
Iter-7980, train loss-0.8280, acc-0.8200, valid loss-0.8495, acc-0.7848, test loss-0.8535, acc-0.7907
Iter-7990, train loss-0.6688, acc-0.8200, valid loss-0.8488, acc-0.7850, test loss-0.8528, acc-0.7907
Iter-8000, train loss-0.8947, acc-0.8200, valid loss-0.8482, acc-0.7856, test loss-0.8521, acc-0.7904
Iter-8010, train loss-0.7503, acc-0.8200, valid loss-0.8475, acc-0.7856, test loss-0.8514, acc-0.7908
Iter-8020, train loss-1.0128, acc-0.7200, valid loss-0.8468, acc-0.7860, test loss-0.8508, acc-0.7905
Iter-8030, train loss-0.7998, acc-0.8600, valid loss-0.8461, acc-0.7864, test loss-0.8501, acc-0.7910
Iter-8040, train loss-1.0654, acc-0.6600, valid loss-0.8455, acc-0.7868, test loss-0.8494, acc-0.7909
Iter-8050, train loss-0.9714, acc-0.7800, valid loss-0.8448, acc-0.7870, test loss-0.8487, acc-0.7917
Iter-8060, train loss-1.0564, acc-0.6600, valid loss-0.8442, acc-0.7868, test loss-0.8481, acc-0.7919
Iter-8070, train loss-0.7422, acc-0.8800, valid loss-0.8435, acc-0.7868, test loss-0.8474, acc-0.7918
Iter-8080, train loss-0.8985, acc-0.7000, valid loss-0.8429, acc-0.7874, test loss-0.8468, acc-0.7917
Iter-8090, train loss-0.7847, acc-0.8200, valid loss-0.8422, acc-0.7874, test loss-0.8461, acc-0.7919
Iter-8100, train loss-0.9294, acc-0.7000, valid loss-0.8416, acc-0.7872, test loss-0.8454, acc-0.7918
Iter-8110, train loss-0.9710, acc-0.7400, valid loss-0.8409, acc-0.7876, test loss-0.8448, acc-0.7921
Iter-8120, train loss-0.8266, acc-0.7800, valid loss-0.8402, acc-0.7876, test loss-0.8441, acc-0.7924
Iter-8130, train loss-0.7606, acc-0.8600, valid loss-0.8395, acc-0.7880, test loss-0.8434, acc-0.7924
Iter-8140, train loss-0.8562, acc-0.7600, valid loss-0.8389, acc-0.7886, test loss-0.8428, acc-0.7929
Iter-8150, train loss-0.8420, acc-0.8400, valid loss-0.8383, acc-0.7886, test loss-0.8422, acc-0.7928
Iter-8160, train loss-0.9050, acc-0.6400, valid loss-0.8376, acc-0.7894, test loss-0.8415, acc-0.7935
Iter-8170, train loss-0.7937, acc-0.8400, valid loss-0.8369, acc-0.7894, test loss-0.8409, acc-0.7937
Iter-8180, train loss-0.7798, acc-0.7800, valid loss-0.8363, acc-0.7896, test loss-0.8402, acc-0.7938
Iter-8190, train loss-0.9969, acc-0.7400, valid loss-0.8356, acc-0.7902, test loss-0.8395, acc-0.7942
Iter-8200, train loss-0.9093, acc-0.7800, valid loss-0.8349, acc-0.7904, test loss-0.8389, acc-0.7942
Iter-8210, train loss-0.9305, acc-0.7800, valid loss-0.8344, acc-0.7908, test loss-0.8383, acc-0.7944
Iter-8220, train loss-0.8478, acc-0.6800, valid loss-0.8337, acc-0.7910, test loss-0.8377, acc-0.7946
Iter-8230, train loss-0.7574, acc-0.8800, valid loss-0.8331, acc-0.7912, test loss-0.8371, acc-0.7944
Iter-8240, train loss-0.8527, acc-0.7400, valid loss-0.8325, acc-0.7914, test loss-0.8364, acc-0.7947
Iter-8250, train loss-1.1975, acc-0.6600, valid loss-0.8319, acc-0.7906, test loss-0.8358, acc-0.7953
Iter-8260, train loss-0.7729, acc-0.7600, valid loss-0.8313, acc-0.7908, test loss-0.8352, acc-0.7956
Iter-8270, train loss-0.9817, acc-0.7400, valid loss-0.8306, acc-0.7906, test loss-0.8345, acc-0.7956
Iter-8280, train loss-0.8242, acc-0.8000, valid loss-0.8300, acc-0.7906, test loss-0.8339, acc-0.7958
Iter-8290, train loss-0.9218, acc-0.7400, valid loss-0.8294, acc-0.7906, test loss-0.8333, acc-0.7957
Iter-8300, train loss-0.8319, acc-0.7400, valid loss-0.8287, acc-0.7904, test loss-0.8327, acc-0.7960
Iter-8310, train loss-0.8449, acc-0.8000, valid loss-0.8282, acc-0.7906, test loss-0.8321, acc-0.7960
Iter-8320, train loss-0.9212, acc-0.7200, valid loss-0.8276, acc-0.7908, test loss-0.8314, acc-0.7957
Iter-8330, train loss-0.8189, acc-0.8400, valid loss-0.8269, acc-0.7912, test loss-0.8308, acc-0.7961
Iter-8340, train loss-0.8140, acc-0.8200, valid loss-0.8263, acc-0.7908, test loss-0.8302, acc-0.7963
Iter-8350, train loss-0.9028, acc-0.7400, valid loss-0.8257, acc-0.7908, test loss-0.8296, acc-0.7964
Iter-8360, train loss-0.8676, acc-0.7600, valid loss-0.8251, acc-0.7906, test loss-0.8290, acc-0.7964
Iter-8370, train loss-0.8135, acc-0.8000, valid loss-0.8245, acc-0.7908, test loss-0.8283, acc-0.7964
Iter-8380, train loss-0.8796, acc-0.7800, valid loss-0.8238, acc-0.7908, test loss-0.8276, acc-0.7965
Iter-8390, train loss-0.7316, acc-0.8400, valid loss-0.8232, acc-0.7914, test loss-0.8270, acc-0.7967
Iter-8400, train loss-0.9603, acc-0.7400, valid loss-0.8225, acc-0.7918, test loss-0.8263, acc-0.7968
Iter-8410, train loss-0.7382, acc-0.8600, valid loss-0.8218, acc-0.7918, test loss-0.8257, acc-0.7969
Iter-8420, train loss-0.8152, acc-0.7800, valid loss-0.8212, acc-0.7918, test loss-0.8250, acc-0.7973
Iter-8430, train loss-0.8030, acc-0.7800, valid loss-0.8206, acc-0.7918, test loss-0.8244, acc-0.7974
Iter-8440, train loss-0.8261, acc-0.7800, valid loss-0.8200, acc-0.7920, test loss-0.8238, acc-0.7976
Iter-8450, train loss-0.8193, acc-0.8200, valid loss-0.8193, acc-0.7918, test loss-0.8232, acc-0.7982
Iter-8460, train loss-0.7499, acc-0.8800, valid loss-0.8187, acc-0.7920, test loss-0.8226, acc-0.7985
Iter-8470, train loss-0.7826, acc-0.8200, valid loss-0.8180, acc-0.7920, test loss-0.8220, acc-0.7983
Iter-8480, train loss-0.8430, acc-0.7800, valid loss-0.8174, acc-0.7926, test loss-0.8213, acc-0.7987
Iter-8490, train loss-0.8943, acc-0.7200, valid loss-0.8168, acc-0.7926, test loss-0.8207, acc-0.7986
Iter-8500, train loss-0.9416, acc-0.7200, valid loss-0.8162, acc-0.7922, test loss-0.8201, acc-0.7989
Iter-8510, train loss-0.7790, acc-0.8400, valid loss-0.8156, acc-0.7926, test loss-0.8194, acc-0.7991
Iter-8520, train loss-0.8552, acc-0.8000, valid loss-0.8150, acc-0.7926, test loss-0.8188, acc-0.7988
Iter-8530, train loss-0.9608, acc-0.7600, valid loss-0.8144, acc-0.7932, test loss-0.8182, acc-0.7993
Iter-8540, train loss-0.6814, acc-0.8600, valid loss-0.8138, acc-0.7932, test loss-0.8176, acc-0.7994
Iter-8550, train loss-0.7448, acc-0.8400, valid loss-0.8131, acc-0.7936, test loss-0.8170, acc-0.7993
Iter-8560, train loss-0.8890, acc-0.7600, valid loss-0.8126, acc-0.7938, test loss-0.8164, acc-0.7997
Iter-8570, train loss-0.8335, acc-0.8200, valid loss-0.8120, acc-0.7936, test loss-0.8158, acc-0.8000
Iter-8580, train loss-0.8497, acc-0.7800, valid loss-0.8114, acc-0.7936, test loss-0.8152, acc-0.7996
Iter-8590, train loss-0.8249, acc-0.8000, valid loss-0.8108, acc-0.7946, test loss-0.8146, acc-0.7997
Iter-8600, train loss-0.9217, acc-0.6600, valid loss-0.8101, acc-0.7950, test loss-0.8139, acc-0.7998
Iter-8610, train loss-0.7981, acc-0.8000, valid loss-0.8095, acc-0.7950, test loss-0.8133, acc-0.8001
Iter-8620, train loss-0.8125, acc-0.8400, valid loss-0.8089, acc-0.7948, test loss-0.8127, acc-0.8004
Iter-8630, train loss-0.6607, acc-0.9200, valid loss-0.8083, acc-0.7950, test loss-0.8121, acc-0.8003
Iter-8640, train loss-0.8690, acc-0.7400, valid loss-0.8077, acc-0.7952, test loss-0.8115, acc-0.8002
Iter-8650, train loss-0.8840, acc-0.8000, valid loss-0.8071, acc-0.7958, test loss-0.8109, acc-0.8003
Iter-8660, train loss-0.7601, acc-0.8200, valid loss-0.8064, acc-0.7966, test loss-0.8102, acc-0.8005
Iter-8670, train loss-0.7734, acc-0.8000, valid loss-0.8058, acc-0.7958, test loss-0.8096, acc-0.8010
Iter-8680, train loss-0.7748, acc-0.8200, valid loss-0.8052, acc-0.7964, test loss-0.8090, acc-0.8012
Iter-8690, train loss-0.8495, acc-0.7800, valid loss-0.8047, acc-0.7964, test loss-0.8084, acc-0.8011
Iter-8700, train loss-0.7230, acc-0.8600, valid loss-0.8041, acc-0.7968, test loss-0.8078, acc-0.8012
Iter-8710, train loss-0.8537, acc-0.7600, valid loss-0.8034, acc-0.7968, test loss-0.8072, acc-0.8013
Iter-8720, train loss-0.7984, acc-0.7800, valid loss-0.8029, acc-0.7964, test loss-0.8066, acc-0.8014
Iter-8730, train loss-0.9854, acc-0.6600, valid loss-0.8023, acc-0.7966, test loss-0.8060, acc-0.8018
Iter-8740, train loss-0.8405, acc-0.7200, valid loss-0.8017, acc-0.7974, test loss-0.8054, acc-0.8019
Iter-8750, train loss-1.0221, acc-0.7400, valid loss-0.8011, acc-0.7974, test loss-0.8047, acc-0.8022
Iter-8760, train loss-0.9107, acc-0.7400, valid loss-0.8006, acc-0.7970, test loss-0.8042, acc-0.8023
Iter-8770, train loss-0.9330, acc-0.7000, valid loss-0.8000, acc-0.7972, test loss-0.8036, acc-0.8023
Iter-8780, train loss-0.8054, acc-0.7400, valid loss-0.7994, acc-0.7988, test loss-0.8030, acc-0.8026
Iter-8790, train loss-0.8844, acc-0.8200, valid loss-0.7988, acc-0.7986, test loss-0.8024, acc-0.8027
Iter-8800, train loss-0.9159, acc-0.7800, valid loss-0.7982, acc-0.7990, test loss-0.8018, acc-0.8030
Iter-8810, train loss-0.7676, acc-0.8800, valid loss-0.7976, acc-0.7988, test loss-0.8012, acc-0.8032
Iter-8820, train loss-0.7710, acc-0.8200, valid loss-0.7970, acc-0.7988, test loss-0.8006, acc-0.8032
Iter-8830, train loss-0.7973, acc-0.7000, valid loss-0.7964, acc-0.7990, test loss-0.8000, acc-0.8037
Iter-8840, train loss-0.6872, acc-0.8000, valid loss-0.7959, acc-0.7988, test loss-0.7995, acc-0.8042
Iter-8850, train loss-0.8369, acc-0.8200, valid loss-0.7953, acc-0.7982, test loss-0.7989, acc-0.8043
Iter-8860, train loss-0.6361, acc-0.8200, valid loss-0.7948, acc-0.7986, test loss-0.7984, acc-0.8046
Iter-8870, train loss-0.8495, acc-0.7800, valid loss-0.7942, acc-0.7986, test loss-0.7978, acc-0.8047
Iter-8880, train loss-1.0472, acc-0.7600, valid loss-0.7936, acc-0.7986, test loss-0.7972, acc-0.8051
Iter-8890, train loss-0.7759, acc-0.8000, valid loss-0.7931, acc-0.7994, test loss-0.7966, acc-0.8051
Iter-8900, train loss-0.8178, acc-0.7800, valid loss-0.7925, acc-0.7996, test loss-0.7960, acc-0.8051
Iter-8910, train loss-0.7690, acc-0.8000, valid loss-0.7919, acc-0.7992, test loss-0.7955, acc-0.8054
Iter-8920, train loss-0.6872, acc-0.8200, valid loss-0.7913, acc-0.7998, test loss-0.7949, acc-0.8051
Iter-8930, train loss-0.7909, acc-0.7800, valid loss-0.7908, acc-0.7994, test loss-0.7943, acc-0.8057
Iter-8940, train loss-0.6907, acc-0.9000, valid loss-0.7902, acc-0.7998, test loss-0.7937, acc-0.8054
Iter-8950, train loss-0.8370, acc-0.8000, valid loss-0.7896, acc-0.7996, test loss-0.7932, acc-0.8057
Iter-8960, train loss-0.7007, acc-0.8600, valid loss-0.7891, acc-0.7996, test loss-0.7926, acc-0.8059
Iter-8970, train loss-0.7781, acc-0.8400, valid loss-0.7885, acc-0.7996, test loss-0.7921, acc-0.8060
Iter-8980, train loss-0.8662, acc-0.8200, valid loss-0.7880, acc-0.8002, test loss-0.7915, acc-0.8066
Iter-8990, train loss-0.7905, acc-0.7600, valid loss-0.7874, acc-0.8000, test loss-0.7909, acc-0.8062
Iter-9000, train loss-0.7781, acc-0.8200, valid loss-0.7868, acc-0.8004, test loss-0.7903, acc-0.8062
Iter-9010, train loss-0.8444, acc-0.7400, valid loss-0.7862, acc-0.8004, test loss-0.7898, acc-0.8067
Iter-9020, train loss-0.7388, acc-0.8200, valid loss-0.7856, acc-0.8006, test loss-0.7892, acc-0.8066
Iter-9030, train loss-0.7035, acc-0.8400, valid loss-0.7851, acc-0.8008, test loss-0.7886, acc-0.8067
Iter-9040, train loss-0.6643, acc-0.9000, valid loss-0.7845, acc-0.8012, test loss-0.7880, acc-0.8067
Iter-9050, train loss-0.8354, acc-0.7800, valid loss-0.7839, acc-0.8016, test loss-0.7874, acc-0.8070
Iter-9060, train loss-0.8135, acc-0.8400, valid loss-0.7833, acc-0.8020, test loss-0.7868, acc-0.8071
Iter-9070, train loss-0.7614, acc-0.9200, valid loss-0.7827, acc-0.8026, test loss-0.7863, acc-0.8069
Iter-9080, train loss-0.8871, acc-0.7800, valid loss-0.7822, acc-0.8020, test loss-0.7857, acc-0.8072
Iter-9090, train loss-0.8691, acc-0.7400, valid loss-0.7816, acc-0.8024, test loss-0.7851, acc-0.8078
Iter-9100, train loss-0.8621, acc-0.7200, valid loss-0.7811, acc-0.8024, test loss-0.7846, acc-0.8079
Iter-9110, train loss-0.7340, acc-0.8800, valid loss-0.7805, acc-0.8024, test loss-0.7840, acc-0.8077
Iter-9120, train loss-0.8399, acc-0.7400, valid loss-0.7800, acc-0.8020, test loss-0.7834, acc-0.8081
Iter-9130, train loss-0.7801, acc-0.7600, valid loss-0.7794, acc-0.8026, test loss-0.7829, acc-0.8085
Iter-9140, train loss-0.8156, acc-0.7800, valid loss-0.7789, acc-0.8024, test loss-0.7824, acc-0.8088
Iter-9150, train loss-0.8172, acc-0.7400, valid loss-0.7784, acc-0.8026, test loss-0.7819, acc-0.8088
Iter-9160, train loss-0.6431, acc-0.9000, valid loss-0.7779, acc-0.8038, test loss-0.7814, acc-0.8089
Iter-9170, train loss-0.6674, acc-0.8600, valid loss-0.7773, acc-0.8032, test loss-0.7808, acc-0.8093
Iter-9180, train loss-0.7430, acc-0.8000, valid loss-0.7767, acc-0.8034, test loss-0.7803, acc-0.8093
Iter-9190, train loss-0.8415, acc-0.8000, valid loss-0.7762, acc-0.8038, test loss-0.7797, acc-0.8089
Iter-9200, train loss-0.7413, acc-0.8800, valid loss-0.7756, acc-0.8048, test loss-0.7792, acc-0.8091
Iter-9210, train loss-0.8473, acc-0.7400, valid loss-0.7751, acc-0.8044, test loss-0.7786, acc-0.8091
Iter-9220, train loss-0.7342, acc-0.7800, valid loss-0.7746, acc-0.8044, test loss-0.7781, acc-0.8093
Iter-9230, train loss-0.8816, acc-0.8000, valid loss-0.7741, acc-0.8048, test loss-0.7775, acc-0.8102
Iter-9240, train loss-0.7176, acc-0.8400, valid loss-0.7736, acc-0.8050, test loss-0.7769, acc-0.8105
Iter-9250, train loss-0.7790, acc-0.7800, valid loss-0.7730, acc-0.8052, test loss-0.7764, acc-0.8105
Iter-9260, train loss-0.8858, acc-0.6600, valid loss-0.7725, acc-0.8052, test loss-0.7758, acc-0.8109
Iter-9270, train loss-0.8354, acc-0.8200, valid loss-0.7719, acc-0.8054, test loss-0.7752, acc-0.8108
Iter-9280, train loss-0.9629, acc-0.8000, valid loss-0.7714, acc-0.8054, test loss-0.7747, acc-0.8113
Iter-9290, train loss-0.6635, acc-0.8600, valid loss-0.7709, acc-0.8054, test loss-0.7741, acc-0.8116
Iter-9300, train loss-0.7299, acc-0.8200, valid loss-0.7703, acc-0.8054, test loss-0.7736, acc-0.8120
Iter-9310, train loss-0.7588, acc-0.7400, valid loss-0.7698, acc-0.8056, test loss-0.7731, acc-0.8121
Iter-9320, train loss-0.6175, acc-0.9000, valid loss-0.7693, acc-0.8060, test loss-0.7726, acc-0.8122
Iter-9330, train loss-0.9470, acc-0.7600, valid loss-0.7687, acc-0.8056, test loss-0.7720, acc-0.8119
Iter-9340, train loss-0.9555, acc-0.7400, valid loss-0.7682, acc-0.8054, test loss-0.7714, acc-0.8122
Iter-9350, train loss-0.9069, acc-0.6800, valid loss-0.7676, acc-0.8054, test loss-0.7709, acc-0.8125
Iter-9360, train loss-0.7405, acc-0.8000, valid loss-0.7670, acc-0.8058, test loss-0.7704, acc-0.8122
Iter-9370, train loss-0.7160, acc-0.8400, valid loss-0.7665, acc-0.8062, test loss-0.7699, acc-0.8126
Iter-9380, train loss-0.7799, acc-0.7800, valid loss-0.7659, acc-0.8060, test loss-0.7694, acc-0.8127
Iter-9390, train loss-0.7147, acc-0.8400, valid loss-0.7654, acc-0.8062, test loss-0.7688, acc-0.8131
Iter-9400, train loss-0.6430, acc-0.8000, valid loss-0.7649, acc-0.8068, test loss-0.7683, acc-0.8129
Iter-9410, train loss-0.6709, acc-0.8000, valid loss-0.7644, acc-0.8068, test loss-0.7678, acc-0.8133
Iter-9420, train loss-0.7023, acc-0.9200, valid loss-0.7638, acc-0.8070, test loss-0.7672, acc-0.8135
Iter-9430, train loss-0.7284, acc-0.7600, valid loss-0.7633, acc-0.8072, test loss-0.7667, acc-0.8137
Iter-9440, train loss-0.8629, acc-0.7400, valid loss-0.7628, acc-0.8072, test loss-0.7662, acc-0.8137
Iter-9450, train loss-0.6108, acc-0.8800, valid loss-0.7623, acc-0.8070, test loss-0.7657, acc-0.8137
Iter-9460, train loss-0.7126, acc-0.8600, valid loss-0.7618, acc-0.8076, test loss-0.7651, acc-0.8142
Iter-9470, train loss-0.6634, acc-0.8400, valid loss-0.7612, acc-0.8076, test loss-0.7646, acc-0.8145
Iter-9480, train loss-0.7636, acc-0.7600, valid loss-0.7607, acc-0.8082, test loss-0.7640, acc-0.8142
Iter-9490, train loss-0.6665, acc-0.8600, valid loss-0.7602, acc-0.8078, test loss-0.7635, acc-0.8142
Iter-9500, train loss-0.7612, acc-0.8000, valid loss-0.7596, acc-0.8078, test loss-0.7629, acc-0.8141
Iter-9510, train loss-0.7974, acc-0.8400, valid loss-0.7591, acc-0.8080, test loss-0.7624, acc-0.8142
Iter-9520, train loss-0.8455, acc-0.7200, valid loss-0.7586, acc-0.8086, test loss-0.7618, acc-0.8144
Iter-9530, train loss-0.8379, acc-0.7000, valid loss-0.7581, acc-0.8084, test loss-0.7613, acc-0.8146
Iter-9540, train loss-0.9388, acc-0.6800, valid loss-0.7575, acc-0.8086, test loss-0.7608, acc-0.8143
Iter-9550, train loss-0.7186, acc-0.9000, valid loss-0.7570, acc-0.8090, test loss-0.7603, acc-0.8145
Iter-9560, train loss-0.8034, acc-0.7600, valid loss-0.7565, acc-0.8094, test loss-0.7598, acc-0.8148
Iter-9570, train loss-0.6165, acc-0.8800, valid loss-0.7560, acc-0.8094, test loss-0.7593, acc-0.8149
Iter-9580, train loss-0.8442, acc-0.8000, valid loss-0.7554, acc-0.8092, test loss-0.7588, acc-0.8146
Iter-9590, train loss-0.8136, acc-0.7800, valid loss-0.7548, acc-0.8098, test loss-0.7583, acc-0.8143
Iter-9600, train loss-0.7802, acc-0.8000, valid loss-0.7543, acc-0.8102, test loss-0.7577, acc-0.8148
Iter-9610, train loss-0.5229, acc-0.9200, valid loss-0.7538, acc-0.8112, test loss-0.7572, acc-0.8151
Iter-9620, train loss-0.6964, acc-0.8400, valid loss-0.7533, acc-0.8114, test loss-0.7567, acc-0.8152
Iter-9630, train loss-0.7773, acc-0.8200, valid loss-0.7528, acc-0.8118, test loss-0.7561, acc-0.8156
Iter-9640, train loss-0.8426, acc-0.8000, valid loss-0.7522, acc-0.8118, test loss-0.7556, acc-0.8158
Iter-9650, train loss-0.7098, acc-0.8400, valid loss-0.7517, acc-0.8126, test loss-0.7551, acc-0.8162
Iter-9660, train loss-0.6041, acc-0.9000, valid loss-0.7512, acc-0.8130, test loss-0.7546, acc-0.8161
Iter-9670, train loss-0.7711, acc-0.8000, valid loss-0.7507, acc-0.8126, test loss-0.7541, acc-0.8162
Iter-9680, train loss-0.7451, acc-0.8000, valid loss-0.7502, acc-0.8124, test loss-0.7536, acc-0.8164
Iter-9690, train loss-0.7356, acc-0.9000, valid loss-0.7497, acc-0.8128, test loss-0.7531, acc-0.8161
Iter-9700, train loss-0.8038, acc-0.7600, valid loss-0.7491, acc-0.8128, test loss-0.7525, acc-0.8161
Iter-9710, train loss-0.8488, acc-0.8200, valid loss-0.7487, acc-0.8132, test loss-0.7520, acc-0.8163
Iter-9720, train loss-0.7808, acc-0.7600, valid loss-0.7482, acc-0.8130, test loss-0.7515, acc-0.8166
Iter-9730, train loss-0.8394, acc-0.8000, valid loss-0.7477, acc-0.8132, test loss-0.7510, acc-0.8169
Iter-9740, train loss-0.7930, acc-0.8000, valid loss-0.7472, acc-0.8132, test loss-0.7505, acc-0.8168
Iter-9750, train loss-0.8757, acc-0.6800, valid loss-0.7467, acc-0.8138, test loss-0.7500, acc-0.8167
Iter-9760, train loss-0.6576, acc-0.8200, valid loss-0.7461, acc-0.8138, test loss-0.7495, acc-0.8168
Iter-9770, train loss-0.9665, acc-0.7200, valid loss-0.7457, acc-0.8136, test loss-0.7490, acc-0.8171
Iter-9780, train loss-0.8037, acc-0.8000, valid loss-0.7452, acc-0.8136, test loss-0.7485, acc-0.8171
Iter-9790, train loss-0.7932, acc-0.8000, valid loss-0.7448, acc-0.8138, test loss-0.7481, acc-0.8170
Iter-9800, train loss-0.9688, acc-0.7600, valid loss-0.7443, acc-0.8138, test loss-0.7476, acc-0.8171
Iter-9810, train loss-0.8784, acc-0.7800, valid loss-0.7438, acc-0.8140, test loss-0.7471, acc-0.8174
Iter-9820, train loss-0.7472, acc-0.8600, valid loss-0.7433, acc-0.8144, test loss-0.7466, acc-0.8175
Iter-9830, train loss-0.7089, acc-0.8400, valid loss-0.7428, acc-0.8146, test loss-0.7462, acc-0.8176
Iter-9840, train loss-0.6341, acc-0.8800, valid loss-0.7423, acc-0.8148, test loss-0.7456, acc-0.8174
Iter-9850, train loss-0.6543, acc-0.8200, valid loss-0.7417, acc-0.8150, test loss-0.7451, acc-0.8175
Iter-9860, train loss-0.8322, acc-0.7000, valid loss-0.7412, acc-0.8154, test loss-0.7446, acc-0.8178
Iter-9870, train loss-0.6888, acc-0.8200, valid loss-0.7407, acc-0.8156, test loss-0.7441, acc-0.8180
Iter-9880, train loss-0.8359, acc-0.7000, valid loss-0.7402, acc-0.8160, test loss-0.7436, acc-0.8180
Iter-9890, train loss-0.7036, acc-0.8200, valid loss-0.7397, acc-0.8166, test loss-0.7431, acc-0.8182
Iter-9900, train loss-0.8258, acc-0.7000, valid loss-0.7392, acc-0.8166, test loss-0.7427, acc-0.8186
Iter-9910, train loss-0.6285, acc-0.8600, valid loss-0.7387, acc-0.8168, test loss-0.7422, acc-0.8187
Iter-9920, train loss-0.7049, acc-0.8400, valid loss-0.7382, acc-0.8172, test loss-0.7417, acc-0.8189
Iter-9930, train loss-0.6885, acc-0.8400, valid loss-0.7377, acc-0.8174, test loss-0.7411, acc-0.8187
Iter-9940, train loss-0.7470, acc-0.8200, valid loss-0.7372, acc-0.8178, test loss-0.7406, acc-0.8188
Iter-9950, train loss-0.7063, acc-0.8000, valid loss-0.7367, acc-0.8180, test loss-0.7401, acc-0.8191
Iter-9960, train loss-0.7617, acc-0.8000, valid loss-0.7362, acc-0.8178, test loss-0.7396, acc-0.8194
Iter-9970, train loss-0.8704, acc-0.7800, valid loss-0.7358, acc-0.8184, test loss-0.7391, acc-0.8191
Iter-9980, train loss-0.7599, acc-0.7600, valid loss-0.7353, acc-0.8178, test loss-0.7386, acc-0.8195
Iter-9990, train loss-0.7466, acc-0.8000, valid loss-0.7348, acc-0.8180, test loss-0.7381, acc-0.8193
Iter-10000, train loss-0.6326, acc-0.8600, valid loss-0.7343, acc-0.8180, test loss-0.7376, acc-0.8196

In [8]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt

plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.plot(nn.losses['test'], label='Test loss')
plt.legend()
plt.show()



In [9]:
plt.plot(nn.losses['train_acc'], label='Train accuracy')
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.plot(nn.losses['test_acc'], label='Test accuracy')
plt.legend()
plt.show()



In [ ]: