In [8]:
# Data
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import impl.layer as l
# Dataset preparation and pre-processing
mnist = input_data.read_data_sets('data/MNIST_data/', one_hot=False)
X_train, y_train = mnist.train.images, mnist.train.labels
X_val, y_val = mnist.validation.images, mnist.validation.labels
X_test, y_test = mnist.test.images, mnist.test.labels
X_train.shape, X_val.shape, X_test.shape
Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz
Out[8]:
((55000, 784), (5000, 784), (10000, 784))
In [9]:
# Pre-processing: normalizing
def normalize(X):
# max scale for images 255= 2**8= 8 bit grayscale for each channel
return (X - X.mean(axis=0)) #/ X.std(axis=0)
X_train, X_val, X_test = normalize(X=X_train), normalize(X=X_val), normalize(X=X_test)
In [31]:
from sklearn.utils import shuffle as skshuffle
class FFNN:
def __init__(self, D, C, H, L):
self.L = L # layers
self.C = C # classes
self.losses = {'train':[], 'train_acc':[],
'valid':[], 'valid_acc':[],
'test':[], 'test_acc':[]}
self.model = []
self.W_fixed = []
self.grads = []
self.dy_prev = np.zeros((1, C))
self.y_prev = np.zeros((1, C))
low, high = -1, 1
# Input layer: weights/ biases
m = dict(W=np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.), b=np.zeros((1, H)))
self.model.append(m)
# Fixed feedback weight
m = np.random.uniform(size=(D, H), low=low, high=high) / np.sqrt(D / 2.)
self.W_fixed.append(m)
# Input layer: gradients
self.grads.append({key: np.zeros_like(val) for key, val in self.model[0].items()})
# Hidden layers: weights/ biases
m_L = []
for _ in range(L):
m = dict(W=np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, H)))
m_L.append(m)
self.model.append(m_L)
# Fixed feedback weight
m_L = []
for _ in range(L):
m = np.random.uniform(size=(H, H), low=low, high=high) / np.sqrt(H / 2.)
m_L.append(m)
self.W_fixed.append(m_L)
# Hidden layer: gradients
grad_L = []
for _ in range(L):
grad_L.append({key: np.zeros_like(val) for key, val in self.model[1][0].items()})
self.grads.append(grad_L)
# Output layer: weights/ biases
m = dict(W=np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.), b=np.zeros((1, C)))
self.model.append(m)
# Fixed feedback weight
m = np.random.uniform(size=(H, C), low=low, high=high) / np.sqrt(H / 2.)
self.W_fixed.append(m)
# Output layer: gradients
self.grads.append({key: np.zeros_like(val) for key, val in self.model[2].items()})
def fc_forward(self, X, W, b):
out = (X @ W) + b
cache = (W, X)
return out, cache
def fc_backward(self, dout, cache, W_fixed):
W, X = cache
dW = X.T @ dout
db = np.sum(dout, axis=0).reshape(1, -1) # db_1xn
# dX = dout @ W.T # vanilla Backprop
dX = dout @ W_fixed.T # fba backprop
return dX, dW, db
def train_forward(self, X, train):
caches, ys = [], []
# Input layer
y, fc_cache = self.fc_forward(X=X, W=self.model[0]['W'], b=self.model[0]['b']) # X_1xD, y_1xc
y, nl_cache = l.tanh_forward(X=y)
# y, nl_cache = l.sigmoid_forward(X=y)
if train:
caches.append((fc_cache, nl_cache))
X = y.copy() # pass to the next layer
# Hidden layers
fc_caches, nl_caches = [], []
for layer in range(self.L):
y, fc_cache = self.fc_forward(X=X, W=self.model[1][layer]['W'], b=self.model[1][layer]['b'])
y, nl_cache = l.tanh_forward(X=y)
# y, nl_cache = l.sigmoid_forward(X=y)
X = y.copy() # pass to next layer
if train:
fc_caches.append(fc_cache)
nl_caches.append(nl_cache)
if train:
caches.append((fc_caches, nl_caches)) # caches[1]
# Output layer
y, fc_cache = self.fc_forward(X=X, W=self.model[2]['W'], b=self.model[2]['b'])
y_prob = l.softmax(X=y)
if train:
caches.append(fc_cache)
return y_prob, caches # for backpropating the error
def onehot(self, labels):
# y = np.zeros([labels.size, np.max(labels) + 1])
y = np.zeros([labels.size, self.C])
y[range(labels.size), labels] = 1.
return y
def squared_loss(self, y_pred, y_train):
m = y_pred.shape[0]
data_loss = 0.5 * np.sum((y_pred - self.onehot(y_train))**2)/ m
return data_loss
def dsquared_loss(self, y_pred, y_train):
m = y_pred.shape[0]
grad_y = (y_pred - self.onehot(y_train))/ m
return grad_y
def loss_function(self, y_prob, y_train):
loss = self.squared_loss(y_pred=y_prob, y_train=y_train)
dy = self.dsquared_loss(y_pred=y_prob, y_train=y_train)
return loss, dy
def train_backward(self, dy, caches):
grads = self.grads.copy() # initialized by Zero in every iteration/epoch
dy_prev = self.dy_prev.copy() # for temporal differencing
self.dy_prev = dy.copy() # next iteration/ epoch
# y_prev = self.y_prev.copy() # for temporal differencing
# self.y_prev = y.copy() # next iteration/ epoch
# Output layer
fc_cache = caches[2]
# Softmax backward
dy = dy - dy_prev # temporal diff instead of differentiable function
dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[2])
dy = dX.copy()
# dy = dy @ self.W_fixed[2].T # done
dy_prev = dy_prev @ self.W_fixed[2].T
# y = y @ self.W_fixed[2].T # done
# y_prev = y_prev @ self.W_fixed[2].T
grads[2]['W'] = dW
grads[2]['b'] = db
# Hidden layer
fc_caches, nl_caches = caches[1]
for layer in reversed(range(self.L)):
# dy = l.tanh_backward(cache=nl_caches[layer], dout=dy) # diffable function
# dy = l.sigmoid_backward(cache=nl_caches[layer], dout=dy) # diffable function
dy = dy - dy_prev # temporal diff instead of differentiable function
# dy *= y - y_prev # temporal diff instead of differentiable function
dX, dW, db = self.fc_backward(dout=dy, cache=fc_caches[layer], W_fixed=self.W_fixed[1][layer])
dy = dX.copy()
# dy = dy @ self.W_fixed[2].T # done
dy_prev = dy_prev @ self.W_fixed[1][layer].T
# y = y @ self.W_fixed[1][layer].T # done
# y_prev = y_prev @ self.W_fixed[1][layer].T
grads[1][layer]['W'] = dW
grads[1][layer]['b'] = db
# Input layer
fc_cache, nl_cache = caches[0]
# dy = l.tanh_backward(cache=nl_cache, dout=dy) # diffable function
# dy = l.sigmoid_backward(cache=nl_caches[layer], dout=dy) # diffable function
dy = dy - dy_prev # temporal diff instead of differentiable function
# dy *= y - y_prev # temporal diff instead of differentiable function
dX, dW, db = self.fc_backward(dout=dy, cache=fc_cache, W_fixed=self.W_fixed[0])
grads[0]['W'] = dW
grads[0]['b'] = db
return grads
def test(self, X):
y_prob, _ = self.train_forward(X, train=False)
# if self.mode == 'classification':
y_pred = np.argmax(y_prob, axis=1) # for loss ==err
return y_pred, y_prob
def get_minibatch(self, X, y, minibatch_size, shuffle):
minibatches = []
if shuffle:
X, y = skshuffle(X, y)
for i in range(0, X.shape[0], minibatch_size):
X_mini = X[i:i + minibatch_size]
y_mini = y[i:i + minibatch_size]
minibatches.append((X_mini, y_mini))
return minibatches
def sgd(self, train_set, val_set, alpha, mb_size, n_iter, print_after):
X_train, y_train = train_set
X_val, y_val = val_set
# Epochs
for iter in range(1, n_iter + 1):
# Minibatches
minibatches = self.get_minibatch(X_train, y_train, mb_size, shuffle=True)
idx = np.random.randint(0, len(minibatches))
X_mini, y_mini = minibatches[idx]
# Train the model
y_prob, caches = self.train_forward(X_mini, train=True)
_, dy = self.loss_function(y_prob, y_mini)
grads = self.train_backward(dy, caches)
# Update the model for input layer
for key in grads[0].keys():
self.model[0][key] -= alpha * grads[0][key]
# Update the model for the hidden layers
for layer in range(self.L):
for key in grads[1][layer].keys():
self.model[1][layer][key] -= alpha * grads[1][layer][key]
# Update the model for output layer
for key in grads[2].keys():
self.model[2][key] -= alpha * grads[2][key]
# Training accuracy
y_pred, y_prob = self.test(X_mini)
loss, _ = self.loss_function(y_prob, y_mini) # softmax is included in entropy loss function
self.losses['train'].append(loss)
acc = np.mean(y_pred == y_mini) # confusion matrix
self.losses['train_acc'].append(acc)
# Validate the updated model
y_pred, y_prob = self.test(X_val)
valid_loss, _ = self.loss_function(y_prob, y_val) # softmax is included in entropy loss function
self.losses['valid'].append(valid_loss)
valid_acc = np.mean(y_pred == y_val) # confusion matrix
self.losses['valid_acc'].append(valid_acc)
# Test the final model
y_pred, y_prob = nn.test(X_test)
test_loss, _ = self.loss_function(y_prob, y_test) # softmax is included in entropy loss function
self.losses['test'].append(test_loss)
test_acc = np.mean(y_pred == y_test)
self.losses['test_acc'].append(test_acc)
# print('Test accuracy mean: {:.4f}, std: {:.4f}, loss: {:.4f}'.
# format(acc.mean(), acc.std(), loss))
# Print the model info: loss & accuracy or err & acc
if iter % print_after == 0:
print('Iter-{}, train loss-{:.4f}, acc-{:.4f}, valid loss-{:.4f}, acc-{:.4f}, test loss-{:.4f}, acc-{:.4f}'.format(
iter, loss, acc, valid_loss, valid_acc, test_loss, test_acc))
In [32]:
# Hyper-parameters
n_iter = 10000 # number of epochs
alpha = 1e-2 # learning_rate
mb_size = 100 # 2**10==1024 # width, timestep for sequential data or minibatch size
print_after = 10 # n_iter//10 # print loss for train, valid, and test
num_hidden_units = 32 # number of kernels/ filters in each layer
num_input_units = X_train.shape[1] # noise added at the input lavel as input noise we can use dX or for more improvement
num_output_units = y_train.max() + 1 # number of classes in this classification problem
num_layers = 2 # depth
# Build the model/NN and learn it: running session.
nn = FFNN(C=num_output_units, D=num_input_units, H=num_hidden_units, L=num_layers)
nn.sgd(train_set=(X_train, y_train), val_set=(X_val, y_val), mb_size=mb_size, alpha=alpha,
n_iter=n_iter, print_after=print_after)
Iter-10, train loss-0.4473, acc-0.1400, valid loss-0.4485, acc-0.1162, test loss-0.4485, acc-0.1156
Iter-20, train loss-0.4495, acc-0.1000, valid loss-0.4488, acc-0.1158, test loss-0.4488, acc-0.1141
Iter-30, train loss-0.4492, acc-0.1400, valid loss-0.4490, acc-0.1154, test loss-0.4489, acc-0.1181
Iter-40, train loss-0.4496, acc-0.1000, valid loss-0.4494, acc-0.1144, test loss-0.4493, acc-0.1188
Iter-50, train loss-0.4484, acc-0.1800, valid loss-0.4497, acc-0.1152, test loss-0.4495, acc-0.1182
Iter-60, train loss-0.4504, acc-0.0800, valid loss-0.4500, acc-0.1166, test loss-0.4498, acc-0.1214
Iter-70, train loss-0.4504, acc-0.0900, valid loss-0.4503, acc-0.1194, test loss-0.4500, acc-0.1242
Iter-80, train loss-0.4490, acc-0.1700, valid loss-0.4504, acc-0.1226, test loss-0.4501, acc-0.1295
Iter-90, train loss-0.4502, acc-0.0900, valid loss-0.4504, acc-0.1282, test loss-0.4501, acc-0.1374
Iter-100, train loss-0.4489, acc-0.1600, valid loss-0.4503, acc-0.1306, test loss-0.4500, acc-0.1431
Iter-110, train loss-0.4492, acc-0.1200, valid loss-0.4503, acc-0.1334, test loss-0.4499, acc-0.1469
Iter-120, train loss-0.4486, acc-0.1200, valid loss-0.4500, acc-0.1372, test loss-0.4496, acc-0.1510
Iter-130, train loss-0.4494, acc-0.0900, valid loss-0.4499, acc-0.1388, test loss-0.4495, acc-0.1520
Iter-140, train loss-0.4501, acc-0.1200, valid loss-0.4497, acc-0.1418, test loss-0.4492, acc-0.1550
Iter-150, train loss-0.4492, acc-0.2000, valid loss-0.4494, acc-0.1456, test loss-0.4489, acc-0.1573
Iter-160, train loss-0.4500, acc-0.1200, valid loss-0.4491, acc-0.1508, test loss-0.4486, acc-0.1624
Iter-170, train loss-0.4473, acc-0.1800, valid loss-0.4486, acc-0.1536, test loss-0.4481, acc-0.1670
Iter-180, train loss-0.4474, acc-0.1400, valid loss-0.4481, acc-0.1560, test loss-0.4475, acc-0.1696
Iter-190, train loss-0.4465, acc-0.1900, valid loss-0.4475, acc-0.1600, test loss-0.4469, acc-0.1747
Iter-200, train loss-0.4484, acc-0.1600, valid loss-0.4468, acc-0.1628, test loss-0.4462, acc-0.1779
Iter-210, train loss-0.4438, acc-0.2000, valid loss-0.4462, acc-0.1656, test loss-0.4456, acc-0.1785
Iter-220, train loss-0.4473, acc-0.1700, valid loss-0.4455, acc-0.1686, test loss-0.4449, acc-0.1803
Iter-230, train loss-0.4468, acc-0.1100, valid loss-0.4446, acc-0.1728, test loss-0.4440, acc-0.1863
Iter-240, train loss-0.4460, acc-0.1600, valid loss-0.4439, acc-0.1718, test loss-0.4432, acc-0.1885
Iter-250, train loss-0.4407, acc-0.2400, valid loss-0.4428, acc-0.1766, test loss-0.4422, acc-0.1924
Iter-260, train loss-0.4397, acc-0.2000, valid loss-0.4416, acc-0.1846, test loss-0.4410, acc-0.1997
Iter-270, train loss-0.4376, acc-0.2800, valid loss-0.4408, acc-0.1888, test loss-0.4401, acc-0.2044
Iter-280, train loss-0.4422, acc-0.1700, valid loss-0.4396, acc-0.1952, test loss-0.4389, acc-0.2091
Iter-290, train loss-0.4353, acc-0.1600, valid loss-0.4387, acc-0.1982, test loss-0.4380, acc-0.2103
Iter-300, train loss-0.4398, acc-0.1700, valid loss-0.4375, acc-0.1998, test loss-0.4367, acc-0.2151
Iter-310, train loss-0.4354, acc-0.2300, valid loss-0.4364, acc-0.2062, test loss-0.4356, acc-0.2189
Iter-320, train loss-0.4392, acc-0.2000, valid loss-0.4350, acc-0.2108, test loss-0.4343, acc-0.2214
Iter-330, train loss-0.4344, acc-0.1700, valid loss-0.4337, acc-0.2148, test loss-0.4329, acc-0.2231
Iter-340, train loss-0.4333, acc-0.2200, valid loss-0.4324, acc-0.2214, test loss-0.4316, acc-0.2280
Iter-350, train loss-0.4281, acc-0.2700, valid loss-0.4311, acc-0.2286, test loss-0.4304, acc-0.2362
Iter-360, train loss-0.4346, acc-0.2200, valid loss-0.4299, acc-0.2376, test loss-0.4291, acc-0.2446
Iter-370, train loss-0.4355, acc-0.1500, valid loss-0.4286, acc-0.2484, test loss-0.4278, acc-0.2568
Iter-380, train loss-0.4303, acc-0.2600, valid loss-0.4271, acc-0.2668, test loss-0.4264, acc-0.2763
Iter-390, train loss-0.4273, acc-0.3000, valid loss-0.4257, acc-0.2864, test loss-0.4250, acc-0.2966
Iter-400, train loss-0.4240, acc-0.2500, valid loss-0.4242, acc-0.3008, test loss-0.4234, acc-0.3093
Iter-410, train loss-0.4225, acc-0.3000, valid loss-0.4226, acc-0.3104, test loss-0.4218, acc-0.3182
Iter-420, train loss-0.4230, acc-0.3300, valid loss-0.4212, acc-0.3190, test loss-0.4204, acc-0.3267
Iter-430, train loss-0.4174, acc-0.3700, valid loss-0.4196, acc-0.3272, test loss-0.4189, acc-0.3365
Iter-440, train loss-0.4099, acc-0.4000, valid loss-0.4180, acc-0.3306, test loss-0.4173, acc-0.3428
Iter-450, train loss-0.4127, acc-0.3700, valid loss-0.4164, acc-0.3324, test loss-0.4157, acc-0.3473
Iter-460, train loss-0.4143, acc-0.3500, valid loss-0.4146, acc-0.3408, test loss-0.4139, acc-0.3522
Iter-470, train loss-0.4111, acc-0.4500, valid loss-0.4128, acc-0.3474, test loss-0.4122, acc-0.3568
Iter-480, train loss-0.4082, acc-0.3900, valid loss-0.4112, acc-0.3584, test loss-0.4105, acc-0.3668
Iter-490, train loss-0.4147, acc-0.2700, valid loss-0.4097, acc-0.3662, test loss-0.4091, acc-0.3755
Iter-500, train loss-0.4117, acc-0.3100, valid loss-0.4079, acc-0.3728, test loss-0.4073, acc-0.3823
Iter-510, train loss-0.4138, acc-0.3600, valid loss-0.4062, acc-0.3800, test loss-0.4056, acc-0.3838
Iter-520, train loss-0.4026, acc-0.4500, valid loss-0.4045, acc-0.3836, test loss-0.4039, acc-0.3901
Iter-530, train loss-0.3978, acc-0.4400, valid loss-0.4031, acc-0.3954, test loss-0.4024, acc-0.3967
Iter-540, train loss-0.4085, acc-0.3200, valid loss-0.4015, acc-0.4002, test loss-0.4008, acc-0.4002
Iter-550, train loss-0.4108, acc-0.2900, valid loss-0.3996, acc-0.4000, test loss-0.3990, acc-0.4006
Iter-560, train loss-0.4014, acc-0.3100, valid loss-0.3978, acc-0.4004, test loss-0.3972, acc-0.3995
Iter-570, train loss-0.3973, acc-0.4400, valid loss-0.3960, acc-0.3972, test loss-0.3954, acc-0.3960
Iter-580, train loss-0.4013, acc-0.3500, valid loss-0.3943, acc-0.3968, test loss-0.3937, acc-0.3957
Iter-590, train loss-0.4028, acc-0.3700, valid loss-0.3926, acc-0.3960, test loss-0.3921, acc-0.3949
Iter-600, train loss-0.3884, acc-0.3900, valid loss-0.3910, acc-0.3928, test loss-0.3905, acc-0.3912
Iter-610, train loss-0.3886, acc-0.4400, valid loss-0.3891, acc-0.3930, test loss-0.3885, acc-0.3927
Iter-620, train loss-0.3933, acc-0.3600, valid loss-0.3874, acc-0.3936, test loss-0.3869, acc-0.3932
Iter-630, train loss-0.3753, acc-0.4600, valid loss-0.3856, acc-0.3906, test loss-0.3851, acc-0.3899
Iter-640, train loss-0.3845, acc-0.4000, valid loss-0.3838, acc-0.3920, test loss-0.3834, acc-0.3905
Iter-650, train loss-0.3799, acc-0.4300, valid loss-0.3821, acc-0.3942, test loss-0.3817, acc-0.3893
Iter-660, train loss-0.3801, acc-0.4500, valid loss-0.3803, acc-0.4024, test loss-0.3800, acc-0.3937
Iter-670, train loss-0.3831, acc-0.3700, valid loss-0.3785, acc-0.4052, test loss-0.3783, acc-0.3955
Iter-680, train loss-0.3767, acc-0.4400, valid loss-0.3771, acc-0.4070, test loss-0.3769, acc-0.3967
Iter-690, train loss-0.3749, acc-0.4100, valid loss-0.3755, acc-0.4094, test loss-0.3753, acc-0.4021
Iter-700, train loss-0.3698, acc-0.4200, valid loss-0.3739, acc-0.4144, test loss-0.3737, acc-0.4066
Iter-710, train loss-0.3742, acc-0.4000, valid loss-0.3721, acc-0.4198, test loss-0.3720, acc-0.4108
Iter-720, train loss-0.3692, acc-0.4500, valid loss-0.3705, acc-0.4230, test loss-0.3704, acc-0.4129
Iter-730, train loss-0.3657, acc-0.4200, valid loss-0.3689, acc-0.4292, test loss-0.3689, acc-0.4204
Iter-740, train loss-0.3665, acc-0.4000, valid loss-0.3673, acc-0.4330, test loss-0.3673, acc-0.4229
Iter-750, train loss-0.3709, acc-0.4500, valid loss-0.3659, acc-0.4354, test loss-0.3659, acc-0.4237
Iter-760, train loss-0.3722, acc-0.4000, valid loss-0.3640, acc-0.4458, test loss-0.3642, acc-0.4301
Iter-770, train loss-0.3677, acc-0.4500, valid loss-0.3627, acc-0.4478, test loss-0.3628, acc-0.4321
Iter-780, train loss-0.3757, acc-0.3500, valid loss-0.3608, acc-0.4556, test loss-0.3610, acc-0.4379
Iter-790, train loss-0.3478, acc-0.5200, valid loss-0.3593, acc-0.4622, test loss-0.3595, acc-0.4436
Iter-800, train loss-0.3505, acc-0.5100, valid loss-0.3580, acc-0.4624, test loss-0.3583, acc-0.4438
Iter-810, train loss-0.3719, acc-0.4000, valid loss-0.3562, acc-0.4670, test loss-0.3564, acc-0.4471
Iter-820, train loss-0.3579, acc-0.4500, valid loss-0.3549, acc-0.4726, test loss-0.3551, acc-0.4525
Iter-830, train loss-0.3725, acc-0.3300, valid loss-0.3531, acc-0.4782, test loss-0.3534, acc-0.4602
Iter-840, train loss-0.3511, acc-0.4900, valid loss-0.3517, acc-0.4862, test loss-0.3521, acc-0.4695
Iter-850, train loss-0.3389, acc-0.5300, valid loss-0.3501, acc-0.4896, test loss-0.3505, acc-0.4773
Iter-860, train loss-0.3480, acc-0.4400, valid loss-0.3488, acc-0.4994, test loss-0.3492, acc-0.4869
Iter-870, train loss-0.3678, acc-0.4100, valid loss-0.3471, acc-0.5028, test loss-0.3476, acc-0.4907
Iter-880, train loss-0.3501, acc-0.5500, valid loss-0.3454, acc-0.5048, test loss-0.3460, acc-0.4932
Iter-890, train loss-0.3595, acc-0.4800, valid loss-0.3439, acc-0.5070, test loss-0.3444, acc-0.4933
Iter-900, train loss-0.3344, acc-0.5500, valid loss-0.3425, acc-0.5084, test loss-0.3431, acc-0.4988
Iter-910, train loss-0.3642, acc-0.4300, valid loss-0.3409, acc-0.5150, test loss-0.3415, acc-0.5061
Iter-920, train loss-0.3349, acc-0.5300, valid loss-0.3393, acc-0.5188, test loss-0.3399, acc-0.5094
Iter-930, train loss-0.3588, acc-0.4200, valid loss-0.3378, acc-0.5202, test loss-0.3385, acc-0.5139
Iter-940, train loss-0.3382, acc-0.5500, valid loss-0.3360, acc-0.5234, test loss-0.3368, acc-0.5179
Iter-950, train loss-0.3445, acc-0.5100, valid loss-0.3345, acc-0.5266, test loss-0.3353, acc-0.5262
Iter-960, train loss-0.3546, acc-0.4100, valid loss-0.3328, acc-0.5300, test loss-0.3336, acc-0.5290
Iter-970, train loss-0.3226, acc-0.5600, valid loss-0.3312, acc-0.5328, test loss-0.3321, acc-0.5318
Iter-980, train loss-0.3327, acc-0.5500, valid loss-0.3296, acc-0.5334, test loss-0.3305, acc-0.5310
Iter-990, train loss-0.3218, acc-0.6200, valid loss-0.3280, acc-0.5376, test loss-0.3288, acc-0.5356
Iter-1000, train loss-0.3228, acc-0.5900, valid loss-0.3263, acc-0.5416, test loss-0.3271, acc-0.5381
Iter-1010, train loss-0.3316, acc-0.5400, valid loss-0.3247, acc-0.5446, test loss-0.3255, acc-0.5425
Iter-1020, train loss-0.3128, acc-0.5600, valid loss-0.3233, acc-0.5458, test loss-0.3241, acc-0.5454
Iter-1030, train loss-0.3176, acc-0.5600, valid loss-0.3217, acc-0.5506, test loss-0.3224, acc-0.5499
Iter-1040, train loss-0.3343, acc-0.5100, valid loss-0.3201, acc-0.5542, test loss-0.3208, acc-0.5533
Iter-1050, train loss-0.3183, acc-0.5700, valid loss-0.3189, acc-0.5532, test loss-0.3196, acc-0.5525
Iter-1060, train loss-0.3114, acc-0.5500, valid loss-0.3173, acc-0.5554, test loss-0.3180, acc-0.5578
Iter-1070, train loss-0.3232, acc-0.5000, valid loss-0.3162, acc-0.5566, test loss-0.3168, acc-0.5595
Iter-1080, train loss-0.3156, acc-0.5700, valid loss-0.3149, acc-0.5582, test loss-0.3156, acc-0.5596
Iter-1090, train loss-0.3201, acc-0.5500, valid loss-0.3134, acc-0.5586, test loss-0.3142, acc-0.5625
Iter-1100, train loss-0.3223, acc-0.5500, valid loss-0.3120, acc-0.5616, test loss-0.3127, acc-0.5620
Iter-1110, train loss-0.3138, acc-0.5400, valid loss-0.3105, acc-0.5646, test loss-0.3111, acc-0.5660
Iter-1120, train loss-0.2741, acc-0.7100, valid loss-0.3090, acc-0.5666, test loss-0.3096, acc-0.5675
Iter-1130, train loss-0.3051, acc-0.5700, valid loss-0.3076, acc-0.5674, test loss-0.3081, acc-0.5693
Iter-1140, train loss-0.3026, acc-0.6000, valid loss-0.3062, acc-0.5696, test loss-0.3066, acc-0.5727
Iter-1150, train loss-0.3152, acc-0.5000, valid loss-0.3046, acc-0.5718, test loss-0.3050, acc-0.5757
Iter-1160, train loss-0.2929, acc-0.6200, valid loss-0.3034, acc-0.5736, test loss-0.3038, acc-0.5783
Iter-1170, train loss-0.3055, acc-0.5800, valid loss-0.3020, acc-0.5740, test loss-0.3024, acc-0.5793
Iter-1180, train loss-0.2949, acc-0.5900, valid loss-0.3003, acc-0.5784, test loss-0.3007, acc-0.5837
Iter-1190, train loss-0.3091, acc-0.5600, valid loss-0.2990, acc-0.5796, test loss-0.2994, acc-0.5850
Iter-1200, train loss-0.2935, acc-0.5700, valid loss-0.2977, acc-0.5800, test loss-0.2980, acc-0.5852
Iter-1210, train loss-0.2968, acc-0.5800, valid loss-0.2964, acc-0.5810, test loss-0.2967, acc-0.5871
Iter-1220, train loss-0.2829, acc-0.6300, valid loss-0.2952, acc-0.5836, test loss-0.2956, acc-0.5894
Iter-1230, train loss-0.2961, acc-0.5500, valid loss-0.2941, acc-0.5838, test loss-0.2943, acc-0.5902
Iter-1240, train loss-0.2988, acc-0.5600, valid loss-0.2929, acc-0.5848, test loss-0.2931, acc-0.5903
Iter-1250, train loss-0.3064, acc-0.5600, valid loss-0.2917, acc-0.5866, test loss-0.2919, acc-0.5926
Iter-1260, train loss-0.3028, acc-0.6000, valid loss-0.2906, acc-0.5898, test loss-0.2907, acc-0.5930
Iter-1270, train loss-0.2854, acc-0.5800, valid loss-0.2895, acc-0.5914, test loss-0.2896, acc-0.5953
Iter-1280, train loss-0.2781, acc-0.6500, valid loss-0.2883, acc-0.5916, test loss-0.2884, acc-0.5979
Iter-1290, train loss-0.2846, acc-0.5700, valid loss-0.2872, acc-0.5926, test loss-0.2871, acc-0.6003
Iter-1300, train loss-0.2992, acc-0.5400, valid loss-0.2859, acc-0.5944, test loss-0.2858, acc-0.6006
Iter-1310, train loss-0.2776, acc-0.5800, valid loss-0.2848, acc-0.5958, test loss-0.2847, acc-0.6017
Iter-1320, train loss-0.2771, acc-0.6400, valid loss-0.2835, acc-0.5966, test loss-0.2834, acc-0.6020
Iter-1330, train loss-0.2830, acc-0.6000, valid loss-0.2826, acc-0.5972, test loss-0.2824, acc-0.6016
Iter-1340, train loss-0.2614, acc-0.6500, valid loss-0.2814, acc-0.6016, test loss-0.2811, acc-0.6055
Iter-1350, train loss-0.2947, acc-0.5600, valid loss-0.2799, acc-0.6060, test loss-0.2796, acc-0.6082
Iter-1360, train loss-0.2867, acc-0.5700, valid loss-0.2788, acc-0.6048, test loss-0.2785, acc-0.6098
Iter-1370, train loss-0.2765, acc-0.6000, valid loss-0.2774, acc-0.6072, test loss-0.2772, acc-0.6110
Iter-1380, train loss-0.2785, acc-0.5800, valid loss-0.2765, acc-0.6076, test loss-0.2761, acc-0.6143
Iter-1390, train loss-0.2985, acc-0.5500, valid loss-0.2757, acc-0.6102, test loss-0.2753, acc-0.6155
Iter-1400, train loss-0.3167, acc-0.4500, valid loss-0.2742, acc-0.6108, test loss-0.2739, acc-0.6173
Iter-1410, train loss-0.2978, acc-0.5900, valid loss-0.2734, acc-0.6118, test loss-0.2729, acc-0.6172
Iter-1420, train loss-0.2392, acc-0.6600, valid loss-0.2722, acc-0.6152, test loss-0.2717, acc-0.6193
Iter-1430, train loss-0.2586, acc-0.6300, valid loss-0.2713, acc-0.6166, test loss-0.2708, acc-0.6221
Iter-1440, train loss-0.2445, acc-0.6900, valid loss-0.2699, acc-0.6180, test loss-0.2694, acc-0.6245
Iter-1450, train loss-0.2605, acc-0.6300, valid loss-0.2686, acc-0.6206, test loss-0.2681, acc-0.6263
Iter-1460, train loss-0.2675, acc-0.5900, valid loss-0.2675, acc-0.6230, test loss-0.2670, acc-0.6260
Iter-1470, train loss-0.2526, acc-0.6300, valid loss-0.2668, acc-0.6234, test loss-0.2662, acc-0.6262
Iter-1480, train loss-0.2779, acc-0.6000, valid loss-0.2657, acc-0.6248, test loss-0.2650, acc-0.6293
Iter-1490, train loss-0.2877, acc-0.6100, valid loss-0.2641, acc-0.6288, test loss-0.2635, acc-0.6314
Iter-1500, train loss-0.2650, acc-0.5800, valid loss-0.2630, acc-0.6310, test loss-0.2623, acc-0.6342
Iter-1510, train loss-0.2734, acc-0.5900, valid loss-0.2618, acc-0.6316, test loss-0.2611, acc-0.6370
Iter-1520, train loss-0.2516, acc-0.6500, valid loss-0.2604, acc-0.6354, test loss-0.2596, acc-0.6402
Iter-1530, train loss-0.2620, acc-0.6800, valid loss-0.2593, acc-0.6374, test loss-0.2585, acc-0.6421
Iter-1540, train loss-0.2576, acc-0.6500, valid loss-0.2581, acc-0.6388, test loss-0.2574, acc-0.6447
Iter-1550, train loss-0.2506, acc-0.6700, valid loss-0.2571, acc-0.6400, test loss-0.2563, acc-0.6460
Iter-1560, train loss-0.2383, acc-0.7100, valid loss-0.2561, acc-0.6414, test loss-0.2553, acc-0.6477
Iter-1570, train loss-0.2511, acc-0.6600, valid loss-0.2549, acc-0.6428, test loss-0.2540, acc-0.6480
Iter-1580, train loss-0.2346, acc-0.7000, valid loss-0.2540, acc-0.6440, test loss-0.2532, acc-0.6503
Iter-1590, train loss-0.2595, acc-0.6400, valid loss-0.2528, acc-0.6464, test loss-0.2520, acc-0.6512
Iter-1600, train loss-0.2278, acc-0.7200, valid loss-0.2517, acc-0.6462, test loss-0.2506, acc-0.6521
Iter-1610, train loss-0.2655, acc-0.6100, valid loss-0.2511, acc-0.6482, test loss-0.2499, acc-0.6534
Iter-1620, train loss-0.2310, acc-0.6500, valid loss-0.2501, acc-0.6504, test loss-0.2487, acc-0.6555
Iter-1630, train loss-0.2505, acc-0.6500, valid loss-0.2491, acc-0.6516, test loss-0.2476, acc-0.6575
Iter-1640, train loss-0.2666, acc-0.6100, valid loss-0.2486, acc-0.6516, test loss-0.2469, acc-0.6583
Iter-1650, train loss-0.2683, acc-0.6700, valid loss-0.2477, acc-0.6532, test loss-0.2460, acc-0.6626
Iter-1660, train loss-0.2467, acc-0.6600, valid loss-0.2468, acc-0.6554, test loss-0.2452, acc-0.6639
Iter-1670, train loss-0.2416, acc-0.6500, valid loss-0.2460, acc-0.6562, test loss-0.2444, acc-0.6668
Iter-1680, train loss-0.2767, acc-0.5900, valid loss-0.2450, acc-0.6590, test loss-0.2435, acc-0.6698
Iter-1690, train loss-0.2572, acc-0.6000, valid loss-0.2439, acc-0.6616, test loss-0.2422, acc-0.6717
Iter-1700, train loss-0.2312, acc-0.6700, valid loss-0.2428, acc-0.6648, test loss-0.2412, acc-0.6721
Iter-1710, train loss-0.2221, acc-0.6800, valid loss-0.2416, acc-0.6658, test loss-0.2400, acc-0.6746
Iter-1720, train loss-0.2419, acc-0.6700, valid loss-0.2405, acc-0.6684, test loss-0.2388, acc-0.6770
Iter-1730, train loss-0.2659, acc-0.6300, valid loss-0.2392, acc-0.6696, test loss-0.2375, acc-0.6811
Iter-1740, train loss-0.2359, acc-0.6700, valid loss-0.2382, acc-0.6710, test loss-0.2365, acc-0.6824
Iter-1750, train loss-0.2504, acc-0.6000, valid loss-0.2374, acc-0.6726, test loss-0.2356, acc-0.6828
Iter-1760, train loss-0.2222, acc-0.7000, valid loss-0.2368, acc-0.6732, test loss-0.2349, acc-0.6833
Iter-1770, train loss-0.2310, acc-0.7100, valid loss-0.2361, acc-0.6732, test loss-0.2343, acc-0.6839
Iter-1780, train loss-0.2312, acc-0.6600, valid loss-0.2353, acc-0.6764, test loss-0.2334, acc-0.6876
Iter-1790, train loss-0.2139, acc-0.7000, valid loss-0.2342, acc-0.6764, test loss-0.2323, acc-0.6889
Iter-1800, train loss-0.2425, acc-0.6800, valid loss-0.2331, acc-0.6768, test loss-0.2313, acc-0.6905
Iter-1810, train loss-0.2091, acc-0.6800, valid loss-0.2324, acc-0.6786, test loss-0.2304, acc-0.6930
Iter-1820, train loss-0.1954, acc-0.7600, valid loss-0.2316, acc-0.6782, test loss-0.2295, acc-0.6937
Iter-1830, train loss-0.2105, acc-0.7000, valid loss-0.2308, acc-0.6808, test loss-0.2288, acc-0.6945
Iter-1840, train loss-0.1744, acc-0.7900, valid loss-0.2301, acc-0.6826, test loss-0.2280, acc-0.6959
Iter-1850, train loss-0.2409, acc-0.7000, valid loss-0.2295, acc-0.6830, test loss-0.2273, acc-0.6962
Iter-1860, train loss-0.2135, acc-0.7300, valid loss-0.2290, acc-0.6836, test loss-0.2269, acc-0.6967
Iter-1870, train loss-0.2587, acc-0.5800, valid loss-0.2282, acc-0.6856, test loss-0.2261, acc-0.6983
Iter-1880, train loss-0.2502, acc-0.6600, valid loss-0.2273, acc-0.6870, test loss-0.2253, acc-0.7004
Iter-1890, train loss-0.2185, acc-0.7100, valid loss-0.2268, acc-0.6874, test loss-0.2247, acc-0.7006
Iter-1900, train loss-0.2136, acc-0.7200, valid loss-0.2258, acc-0.6896, test loss-0.2236, acc-0.7026
Iter-1910, train loss-0.2201, acc-0.6800, valid loss-0.2251, acc-0.6902, test loss-0.2230, acc-0.7053
Iter-1920, train loss-0.2344, acc-0.6400, valid loss-0.2246, acc-0.6922, test loss-0.2225, acc-0.7066
Iter-1930, train loss-0.2057, acc-0.7200, valid loss-0.2239, acc-0.6944, test loss-0.2217, acc-0.7078
Iter-1940, train loss-0.2321, acc-0.6900, valid loss-0.2235, acc-0.6980, test loss-0.2213, acc-0.7094
Iter-1950, train loss-0.2207, acc-0.7100, valid loss-0.2226, acc-0.6990, test loss-0.2205, acc-0.7111
Iter-1960, train loss-0.1980, acc-0.7500, valid loss-0.2221, acc-0.7000, test loss-0.2199, acc-0.7121
Iter-1970, train loss-0.2123, acc-0.7400, valid loss-0.2213, acc-0.7004, test loss-0.2191, acc-0.7129
Iter-1980, train loss-0.2143, acc-0.7300, valid loss-0.2211, acc-0.7012, test loss-0.2188, acc-0.7126
Iter-1990, train loss-0.1963, acc-0.7200, valid loss-0.2205, acc-0.7024, test loss-0.2182, acc-0.7139
Iter-2000, train loss-0.2307, acc-0.6700, valid loss-0.2202, acc-0.7034, test loss-0.2179, acc-0.7140
Iter-2010, train loss-0.2228, acc-0.6700, valid loss-0.2197, acc-0.7038, test loss-0.2174, acc-0.7144
Iter-2020, train loss-0.2356, acc-0.6600, valid loss-0.2190, acc-0.7066, test loss-0.2168, acc-0.7153
Iter-2030, train loss-0.2521, acc-0.6200, valid loss-0.2184, acc-0.7088, test loss-0.2162, acc-0.7155
Iter-2040, train loss-0.2275, acc-0.6600, valid loss-0.2180, acc-0.7076, test loss-0.2157, acc-0.7165
Iter-2050, train loss-0.1954, acc-0.7800, valid loss-0.2179, acc-0.7086, test loss-0.2155, acc-0.7170
Iter-2060, train loss-0.2029, acc-0.7100, valid loss-0.2171, acc-0.7090, test loss-0.2148, acc-0.7182
Iter-2070, train loss-0.2188, acc-0.7400, valid loss-0.2166, acc-0.7092, test loss-0.2143, acc-0.7197
Iter-2080, train loss-0.2127, acc-0.7000, valid loss-0.2162, acc-0.7110, test loss-0.2140, acc-0.7201
Iter-2090, train loss-0.2107, acc-0.7700, valid loss-0.2156, acc-0.7142, test loss-0.2135, acc-0.7199
Iter-2100, train loss-0.2287, acc-0.6700, valid loss-0.2152, acc-0.7132, test loss-0.2131, acc-0.7192
Iter-2110, train loss-0.2383, acc-0.6500, valid loss-0.2149, acc-0.7122, test loss-0.2128, acc-0.7196
Iter-2120, train loss-0.2071, acc-0.6900, valid loss-0.2146, acc-0.7132, test loss-0.2124, acc-0.7191
Iter-2130, train loss-0.2315, acc-0.6900, valid loss-0.2143, acc-0.7130, test loss-0.2121, acc-0.7202
Iter-2140, train loss-0.2276, acc-0.6700, valid loss-0.2139, acc-0.7140, test loss-0.2119, acc-0.7192
Iter-2150, train loss-0.2690, acc-0.5300, valid loss-0.2136, acc-0.7124, test loss-0.2114, acc-0.7192
Iter-2160, train loss-0.2427, acc-0.7000, valid loss-0.2132, acc-0.7114, test loss-0.2110, acc-0.7200
Iter-2170, train loss-0.2192, acc-0.6700, valid loss-0.2127, acc-0.7122, test loss-0.2105, acc-0.7197
Iter-2180, train loss-0.2144, acc-0.7400, valid loss-0.2122, acc-0.7126, test loss-0.2100, acc-0.7202
Iter-2190, train loss-0.1869, acc-0.7500, valid loss-0.2119, acc-0.7130, test loss-0.2098, acc-0.7191
Iter-2200, train loss-0.2157, acc-0.6900, valid loss-0.2115, acc-0.7142, test loss-0.2093, acc-0.7188
Iter-2210, train loss-0.1641, acc-0.8000, valid loss-0.2111, acc-0.7140, test loss-0.2089, acc-0.7194
Iter-2220, train loss-0.1866, acc-0.7500, valid loss-0.2109, acc-0.7146, test loss-0.2087, acc-0.7212
Iter-2230, train loss-0.2419, acc-0.6700, valid loss-0.2104, acc-0.7146, test loss-0.2083, acc-0.7230
Iter-2240, train loss-0.1806, acc-0.7600, valid loss-0.2104, acc-0.7132, test loss-0.2082, acc-0.7222
Iter-2250, train loss-0.2184, acc-0.6900, valid loss-0.2100, acc-0.7158, test loss-0.2079, acc-0.7222
Iter-2260, train loss-0.2053, acc-0.7400, valid loss-0.2095, acc-0.7166, test loss-0.2074, acc-0.7236
Iter-2270, train loss-0.1971, acc-0.7400, valid loss-0.2092, acc-0.7162, test loss-0.2070, acc-0.7240
Iter-2280, train loss-0.1946, acc-0.7400, valid loss-0.2089, acc-0.7182, test loss-0.2068, acc-0.7246
Iter-2290, train loss-0.2229, acc-0.6900, valid loss-0.2090, acc-0.7174, test loss-0.2067, acc-0.7241
Iter-2300, train loss-0.1840, acc-0.7900, valid loss-0.2084, acc-0.7182, test loss-0.2062, acc-0.7259
Iter-2310, train loss-0.2126, acc-0.7200, valid loss-0.2082, acc-0.7194, test loss-0.2060, acc-0.7260
Iter-2320, train loss-0.2122, acc-0.7300, valid loss-0.2076, acc-0.7208, test loss-0.2055, acc-0.7262
Iter-2330, train loss-0.2064, acc-0.7600, valid loss-0.2073, acc-0.7198, test loss-0.2052, acc-0.7254
Iter-2340, train loss-0.1999, acc-0.7400, valid loss-0.2069, acc-0.7210, test loss-0.2047, acc-0.7265
Iter-2350, train loss-0.2030, acc-0.7500, valid loss-0.2065, acc-0.7216, test loss-0.2043, acc-0.7268
Iter-2360, train loss-0.1946, acc-0.7200, valid loss-0.2064, acc-0.7214, test loss-0.2042, acc-0.7267
Iter-2370, train loss-0.2177, acc-0.7000, valid loss-0.2061, acc-0.7212, test loss-0.2039, acc-0.7276
Iter-2380, train loss-0.2098, acc-0.6700, valid loss-0.2059, acc-0.7236, test loss-0.2036, acc-0.7284
Iter-2390, train loss-0.1699, acc-0.7800, valid loss-0.2058, acc-0.7234, test loss-0.2036, acc-0.7284
Iter-2400, train loss-0.2307, acc-0.6700, valid loss-0.2054, acc-0.7246, test loss-0.2032, acc-0.7277
Iter-2410, train loss-0.1746, acc-0.7700, valid loss-0.2053, acc-0.7238, test loss-0.2030, acc-0.7281
Iter-2420, train loss-0.2013, acc-0.7100, valid loss-0.2049, acc-0.7230, test loss-0.2027, acc-0.7288
Iter-2430, train loss-0.2077, acc-0.7300, valid loss-0.2049, acc-0.7234, test loss-0.2026, acc-0.7285
Iter-2440, train loss-0.2225, acc-0.6900, valid loss-0.2047, acc-0.7228, test loss-0.2025, acc-0.7280
Iter-2450, train loss-0.2212, acc-0.7300, valid loss-0.2044, acc-0.7234, test loss-0.2023, acc-0.7282
Iter-2460, train loss-0.2193, acc-0.7400, valid loss-0.2042, acc-0.7246, test loss-0.2022, acc-0.7292
Iter-2470, train loss-0.1657, acc-0.8300, valid loss-0.2039, acc-0.7252, test loss-0.2020, acc-0.7291
Iter-2480, train loss-0.2295, acc-0.6700, valid loss-0.2040, acc-0.7230, test loss-0.2021, acc-0.7271
Iter-2490, train loss-0.2094, acc-0.7100, valid loss-0.2036, acc-0.7242, test loss-0.2017, acc-0.7272
Iter-2500, train loss-0.1915, acc-0.7500, valid loss-0.2031, acc-0.7250, test loss-0.2014, acc-0.7279
Iter-2510, train loss-0.1904, acc-0.7100, valid loss-0.2027, acc-0.7260, test loss-0.2011, acc-0.7288
Iter-2520, train loss-0.2504, acc-0.6600, valid loss-0.2025, acc-0.7268, test loss-0.2010, acc-0.7291
Iter-2530, train loss-0.2162, acc-0.7000, valid loss-0.2020, acc-0.7280, test loss-0.2005, acc-0.7302
Iter-2540, train loss-0.1942, acc-0.7400, valid loss-0.2017, acc-0.7284, test loss-0.2002, acc-0.7306
Iter-2550, train loss-0.2174, acc-0.7100, valid loss-0.2013, acc-0.7306, test loss-0.1999, acc-0.7306
Iter-2560, train loss-0.2041, acc-0.6900, valid loss-0.2011, acc-0.7306, test loss-0.1997, acc-0.7306
Iter-2570, train loss-0.2354, acc-0.6400, valid loss-0.2009, acc-0.7290, test loss-0.1993, acc-0.7316
Iter-2580, train loss-0.2102, acc-0.7000, valid loss-0.2006, acc-0.7308, test loss-0.1991, acc-0.7328
Iter-2590, train loss-0.1955, acc-0.7100, valid loss-0.2004, acc-0.7292, test loss-0.1987, acc-0.7344
Iter-2600, train loss-0.1921, acc-0.7000, valid loss-0.2003, acc-0.7314, test loss-0.1985, acc-0.7338
Iter-2610, train loss-0.2146, acc-0.7100, valid loss-0.2000, acc-0.7320, test loss-0.1983, acc-0.7356
Iter-2620, train loss-0.2193, acc-0.6800, valid loss-0.1998, acc-0.7322, test loss-0.1980, acc-0.7355
Iter-2630, train loss-0.2049, acc-0.7600, valid loss-0.1996, acc-0.7314, test loss-0.1979, acc-0.7358
Iter-2640, train loss-0.1879, acc-0.7700, valid loss-0.1993, acc-0.7324, test loss-0.1977, acc-0.7364
Iter-2650, train loss-0.1856, acc-0.7500, valid loss-0.1990, acc-0.7324, test loss-0.1974, acc-0.7368
Iter-2660, train loss-0.2084, acc-0.6800, valid loss-0.1990, acc-0.7328, test loss-0.1973, acc-0.7372
Iter-2670, train loss-0.2229, acc-0.6700, valid loss-0.1989, acc-0.7316, test loss-0.1973, acc-0.7376
Iter-2680, train loss-0.2011, acc-0.7600, valid loss-0.1986, acc-0.7326, test loss-0.1969, acc-0.7383
Iter-2690, train loss-0.1596, acc-0.8300, valid loss-0.1987, acc-0.7316, test loss-0.1969, acc-0.7367
Iter-2700, train loss-0.1822, acc-0.8000, valid loss-0.1988, acc-0.7308, test loss-0.1970, acc-0.7362
Iter-2710, train loss-0.1661, acc-0.8000, valid loss-0.1985, acc-0.7308, test loss-0.1967, acc-0.7380
Iter-2720, train loss-0.2375, acc-0.6500, valid loss-0.1982, acc-0.7310, test loss-0.1964, acc-0.7384
Iter-2730, train loss-0.1797, acc-0.7500, valid loss-0.1980, acc-0.7298, test loss-0.1962, acc-0.7392
Iter-2740, train loss-0.2276, acc-0.6800, valid loss-0.1977, acc-0.7310, test loss-0.1960, acc-0.7391
Iter-2750, train loss-0.1920, acc-0.7100, valid loss-0.1974, acc-0.7328, test loss-0.1959, acc-0.7389
Iter-2760, train loss-0.2252, acc-0.6700, valid loss-0.1970, acc-0.7326, test loss-0.1955, acc-0.7400
Iter-2770, train loss-0.1955, acc-0.7400, valid loss-0.1967, acc-0.7358, test loss-0.1954, acc-0.7390
Iter-2780, train loss-0.2123, acc-0.7100, valid loss-0.1966, acc-0.7354, test loss-0.1952, acc-0.7397
Iter-2790, train loss-0.1630, acc-0.7800, valid loss-0.1966, acc-0.7362, test loss-0.1953, acc-0.7395
Iter-2800, train loss-0.1989, acc-0.7300, valid loss-0.1964, acc-0.7370, test loss-0.1951, acc-0.7400
Iter-2810, train loss-0.1999, acc-0.7300, valid loss-0.1962, acc-0.7368, test loss-0.1950, acc-0.7408
Iter-2820, train loss-0.1837, acc-0.7400, valid loss-0.1960, acc-0.7382, test loss-0.1949, acc-0.7399
Iter-2830, train loss-0.1662, acc-0.8200, valid loss-0.1957, acc-0.7388, test loss-0.1946, acc-0.7412
Iter-2840, train loss-0.1950, acc-0.7300, valid loss-0.1956, acc-0.7376, test loss-0.1945, acc-0.7413
Iter-2850, train loss-0.2047, acc-0.7100, valid loss-0.1953, acc-0.7360, test loss-0.1941, acc-0.7415
Iter-2860, train loss-0.2035, acc-0.7300, valid loss-0.1953, acc-0.7390, test loss-0.1942, acc-0.7418
Iter-2870, train loss-0.2279, acc-0.7000, valid loss-0.1950, acc-0.7388, test loss-0.1941, acc-0.7412
Iter-2880, train loss-0.2158, acc-0.7000, valid loss-0.1950, acc-0.7392, test loss-0.1940, acc-0.7430
Iter-2890, train loss-0.1923, acc-0.7500, valid loss-0.1946, acc-0.7420, test loss-0.1938, acc-0.7430
Iter-2900, train loss-0.2194, acc-0.6700, valid loss-0.1944, acc-0.7414, test loss-0.1937, acc-0.7430
Iter-2910, train loss-0.2030, acc-0.7400, valid loss-0.1943, acc-0.7414, test loss-0.1937, acc-0.7431
Iter-2920, train loss-0.1813, acc-0.7800, valid loss-0.1942, acc-0.7416, test loss-0.1935, acc-0.7437
Iter-2930, train loss-0.2156, acc-0.6900, valid loss-0.1939, acc-0.7424, test loss-0.1933, acc-0.7433
Iter-2940, train loss-0.2024, acc-0.7200, valid loss-0.1940, acc-0.7420, test loss-0.1934, acc-0.7429
Iter-2950, train loss-0.2380, acc-0.6500, valid loss-0.1939, acc-0.7418, test loss-0.1932, acc-0.7440
Iter-2960, train loss-0.1960, acc-0.7500, valid loss-0.1938, acc-0.7426, test loss-0.1931, acc-0.7443
Iter-2970, train loss-0.1892, acc-0.7500, valid loss-0.1934, acc-0.7434, test loss-0.1927, acc-0.7446
Iter-2980, train loss-0.1566, acc-0.8000, valid loss-0.1934, acc-0.7418, test loss-0.1926, acc-0.7437
Iter-2990, train loss-0.1968, acc-0.7000, valid loss-0.1933, acc-0.7428, test loss-0.1925, acc-0.7438
Iter-3000, train loss-0.1707, acc-0.7900, valid loss-0.1932, acc-0.7414, test loss-0.1924, acc-0.7444
Iter-3010, train loss-0.1910, acc-0.7400, valid loss-0.1930, acc-0.7422, test loss-0.1922, acc-0.7451
Iter-3020, train loss-0.2205, acc-0.7000, valid loss-0.1929, acc-0.7436, test loss-0.1921, acc-0.7446
Iter-3030, train loss-0.1939, acc-0.7400, valid loss-0.1925, acc-0.7444, test loss-0.1918, acc-0.7452
Iter-3040, train loss-0.2025, acc-0.7400, valid loss-0.1923, acc-0.7448, test loss-0.1917, acc-0.7455
Iter-3050, train loss-0.1953, acc-0.7100, valid loss-0.1920, acc-0.7456, test loss-0.1915, acc-0.7463
Iter-3060, train loss-0.2222, acc-0.7000, valid loss-0.1919, acc-0.7464, test loss-0.1914, acc-0.7467
Iter-3070, train loss-0.2131, acc-0.7400, valid loss-0.1920, acc-0.7446, test loss-0.1915, acc-0.7468
Iter-3080, train loss-0.2093, acc-0.7200, valid loss-0.1917, acc-0.7468, test loss-0.1914, acc-0.7469
Iter-3090, train loss-0.2101, acc-0.6700, valid loss-0.1916, acc-0.7472, test loss-0.1913, acc-0.7474
Iter-3100, train loss-0.1974, acc-0.7200, valid loss-0.1915, acc-0.7480, test loss-0.1913, acc-0.7467
Iter-3110, train loss-0.1668, acc-0.7800, valid loss-0.1916, acc-0.7490, test loss-0.1915, acc-0.7464
Iter-3120, train loss-0.2034, acc-0.7300, valid loss-0.1914, acc-0.7482, test loss-0.1914, acc-0.7469
Iter-3130, train loss-0.1766, acc-0.7900, valid loss-0.1914, acc-0.7488, test loss-0.1915, acc-0.7461
Iter-3140, train loss-0.1915, acc-0.7100, valid loss-0.1912, acc-0.7492, test loss-0.1914, acc-0.7454
Iter-3150, train loss-0.2101, acc-0.6800, valid loss-0.1913, acc-0.7488, test loss-0.1914, acc-0.7451
Iter-3160, train loss-0.2228, acc-0.7100, valid loss-0.1912, acc-0.7470, test loss-0.1913, acc-0.7451
Iter-3170, train loss-0.2049, acc-0.7000, valid loss-0.1908, acc-0.7498, test loss-0.1911, acc-0.7465
Iter-3180, train loss-0.1665, acc-0.7900, valid loss-0.1909, acc-0.7478, test loss-0.1911, acc-0.7461
Iter-3190, train loss-0.1577, acc-0.7900, valid loss-0.1907, acc-0.7496, test loss-0.1909, acc-0.7467
Iter-3200, train loss-0.1758, acc-0.8100, valid loss-0.1906, acc-0.7494, test loss-0.1907, acc-0.7469
Iter-3210, train loss-0.1968, acc-0.7100, valid loss-0.1904, acc-0.7498, test loss-0.1906, acc-0.7476
Iter-3220, train loss-0.2334, acc-0.6300, valid loss-0.1903, acc-0.7484, test loss-0.1905, acc-0.7480
Iter-3230, train loss-0.1387, acc-0.8200, valid loss-0.1902, acc-0.7496, test loss-0.1903, acc-0.7477
Iter-3240, train loss-0.1740, acc-0.7500, valid loss-0.1899, acc-0.7502, test loss-0.1902, acc-0.7489
Iter-3250, train loss-0.2156, acc-0.7000, valid loss-0.1898, acc-0.7502, test loss-0.1900, acc-0.7491
Iter-3260, train loss-0.1968, acc-0.7300, valid loss-0.1897, acc-0.7492, test loss-0.1898, acc-0.7495
Iter-3270, train loss-0.1652, acc-0.8000, valid loss-0.1897, acc-0.7494, test loss-0.1899, acc-0.7492
Iter-3280, train loss-0.1918, acc-0.7400, valid loss-0.1899, acc-0.7488, test loss-0.1900, acc-0.7483
Iter-3290, train loss-0.1625, acc-0.7900, valid loss-0.1898, acc-0.7500, test loss-0.1901, acc-0.7475
Iter-3300, train loss-0.2045, acc-0.7200, valid loss-0.1895, acc-0.7508, test loss-0.1898, acc-0.7479
Iter-3310, train loss-0.1645, acc-0.7700, valid loss-0.1897, acc-0.7486, test loss-0.1899, acc-0.7480
Iter-3320, train loss-0.2314, acc-0.6700, valid loss-0.1896, acc-0.7500, test loss-0.1898, acc-0.7485
Iter-3330, train loss-0.2071, acc-0.7100, valid loss-0.1895, acc-0.7492, test loss-0.1896, acc-0.7495
Iter-3340, train loss-0.2140, acc-0.7400, valid loss-0.1896, acc-0.7488, test loss-0.1896, acc-0.7490
Iter-3350, train loss-0.1697, acc-0.8000, valid loss-0.1895, acc-0.7476, test loss-0.1896, acc-0.7488
Iter-3360, train loss-0.1533, acc-0.8200, valid loss-0.1894, acc-0.7486, test loss-0.1896, acc-0.7492
Iter-3370, train loss-0.1641, acc-0.7600, valid loss-0.1896, acc-0.7484, test loss-0.1898, acc-0.7487
Iter-3380, train loss-0.2297, acc-0.6900, valid loss-0.1894, acc-0.7480, test loss-0.1897, acc-0.7482
Iter-3390, train loss-0.2082, acc-0.7100, valid loss-0.1891, acc-0.7486, test loss-0.1894, acc-0.7489
Iter-3400, train loss-0.1833, acc-0.7700, valid loss-0.1889, acc-0.7492, test loss-0.1894, acc-0.7491
Iter-3410, train loss-0.1586, acc-0.7900, valid loss-0.1889, acc-0.7494, test loss-0.1894, acc-0.7495
Iter-3420, train loss-0.2000, acc-0.7500, valid loss-0.1887, acc-0.7500, test loss-0.1891, acc-0.7502
Iter-3430, train loss-0.2042, acc-0.7000, valid loss-0.1884, acc-0.7508, test loss-0.1889, acc-0.7503
Iter-3440, train loss-0.2110, acc-0.6900, valid loss-0.1883, acc-0.7506, test loss-0.1888, acc-0.7509
Iter-3450, train loss-0.2122, acc-0.7500, valid loss-0.1883, acc-0.7514, test loss-0.1889, acc-0.7504
Iter-3460, train loss-0.1978, acc-0.7200, valid loss-0.1883, acc-0.7508, test loss-0.1888, acc-0.7503
Iter-3470, train loss-0.1791, acc-0.7600, valid loss-0.1882, acc-0.7518, test loss-0.1888, acc-0.7506
Iter-3480, train loss-0.1860, acc-0.7500, valid loss-0.1880, acc-0.7532, test loss-0.1887, acc-0.7510
Iter-3490, train loss-0.2018, acc-0.7200, valid loss-0.1880, acc-0.7530, test loss-0.1888, acc-0.7508
Iter-3500, train loss-0.1584, acc-0.8200, valid loss-0.1880, acc-0.7522, test loss-0.1888, acc-0.7511
Iter-3510, train loss-0.1764, acc-0.7400, valid loss-0.1881, acc-0.7516, test loss-0.1889, acc-0.7504
Iter-3520, train loss-0.1785, acc-0.7700, valid loss-0.1878, acc-0.7526, test loss-0.1885, acc-0.7508
Iter-3530, train loss-0.1803, acc-0.7600, valid loss-0.1879, acc-0.7524, test loss-0.1886, acc-0.7519
Iter-3540, train loss-0.2253, acc-0.6900, valid loss-0.1876, acc-0.7522, test loss-0.1884, acc-0.7517
Iter-3550, train loss-0.1869, acc-0.7700, valid loss-0.1877, acc-0.7520, test loss-0.1887, acc-0.7513
Iter-3560, train loss-0.2234, acc-0.7000, valid loss-0.1877, acc-0.7522, test loss-0.1887, acc-0.7512
Iter-3570, train loss-0.1531, acc-0.8400, valid loss-0.1881, acc-0.7518, test loss-0.1891, acc-0.7517
Iter-3580, train loss-0.1825, acc-0.7500, valid loss-0.1880, acc-0.7522, test loss-0.1890, acc-0.7515
Iter-3590, train loss-0.1782, acc-0.7400, valid loss-0.1883, acc-0.7532, test loss-0.1892, acc-0.7516
Iter-3600, train loss-0.1887, acc-0.7900, valid loss-0.1885, acc-0.7532, test loss-0.1894, acc-0.7509
Iter-3610, train loss-0.2025, acc-0.7300, valid loss-0.1886, acc-0.7520, test loss-0.1896, acc-0.7497
Iter-3620, train loss-0.1958, acc-0.7400, valid loss-0.1883, acc-0.7528, test loss-0.1894, acc-0.7503
Iter-3630, train loss-0.2052, acc-0.7200, valid loss-0.1885, acc-0.7538, test loss-0.1895, acc-0.7506
Iter-3640, train loss-0.1928, acc-0.6900, valid loss-0.1883, acc-0.7530, test loss-0.1893, acc-0.7499
Iter-3650, train loss-0.1743, acc-0.7600, valid loss-0.1882, acc-0.7516, test loss-0.1893, acc-0.7505
Iter-3660, train loss-0.1857, acc-0.7800, valid loss-0.1881, acc-0.7516, test loss-0.1892, acc-0.7505
Iter-3670, train loss-0.1860, acc-0.7700, valid loss-0.1881, acc-0.7516, test loss-0.1893, acc-0.7504
Iter-3680, train loss-0.1762, acc-0.8100, valid loss-0.1882, acc-0.7512, test loss-0.1894, acc-0.7505
Iter-3690, train loss-0.1605, acc-0.8000, valid loss-0.1881, acc-0.7512, test loss-0.1893, acc-0.7506
Iter-3700, train loss-0.1822, acc-0.7800, valid loss-0.1880, acc-0.7532, test loss-0.1894, acc-0.7496
Iter-3710, train loss-0.2171, acc-0.7000, valid loss-0.1879, acc-0.7536, test loss-0.1894, acc-0.7503
Iter-3720, train loss-0.1833, acc-0.7400, valid loss-0.1881, acc-0.7532, test loss-0.1896, acc-0.7497
Iter-3730, train loss-0.2001, acc-0.7300, valid loss-0.1884, acc-0.7534, test loss-0.1898, acc-0.7486
Iter-3740, train loss-0.1749, acc-0.7800, valid loss-0.1887, acc-0.7522, test loss-0.1902, acc-0.7484
Iter-3750, train loss-0.1469, acc-0.8100, valid loss-0.1884, acc-0.7540, test loss-0.1899, acc-0.7493
Iter-3760, train loss-0.1835, acc-0.7700, valid loss-0.1884, acc-0.7532, test loss-0.1898, acc-0.7507
Iter-3770, train loss-0.1987, acc-0.7600, valid loss-0.1883, acc-0.7544, test loss-0.1897, acc-0.7506
Iter-3780, train loss-0.1444, acc-0.8500, valid loss-0.1883, acc-0.7538, test loss-0.1899, acc-0.7499
Iter-3790, train loss-0.1895, acc-0.7500, valid loss-0.1882, acc-0.7540, test loss-0.1898, acc-0.7499
Iter-3800, train loss-0.1873, acc-0.7600, valid loss-0.1882, acc-0.7548, test loss-0.1899, acc-0.7500
Iter-3810, train loss-0.2097, acc-0.6900, valid loss-0.1882, acc-0.7554, test loss-0.1902, acc-0.7502
Iter-3820, train loss-0.1776, acc-0.7600, valid loss-0.1880, acc-0.7564, test loss-0.1901, acc-0.7501
Iter-3830, train loss-0.2019, acc-0.7600, valid loss-0.1879, acc-0.7556, test loss-0.1901, acc-0.7500
Iter-3840, train loss-0.2290, acc-0.6700, valid loss-0.1879, acc-0.7554, test loss-0.1900, acc-0.7500
Iter-3850, train loss-0.1709, acc-0.8000, valid loss-0.1881, acc-0.7546, test loss-0.1902, acc-0.7505
Iter-3860, train loss-0.2032, acc-0.7200, valid loss-0.1879, acc-0.7554, test loss-0.1901, acc-0.7514
Iter-3870, train loss-0.1990, acc-0.7400, valid loss-0.1880, acc-0.7550, test loss-0.1902, acc-0.7510
Iter-3880, train loss-0.1854, acc-0.7300, valid loss-0.1879, acc-0.7552, test loss-0.1901, acc-0.7513
Iter-3890, train loss-0.1619, acc-0.8100, valid loss-0.1880, acc-0.7558, test loss-0.1902, acc-0.7501
Iter-3900, train loss-0.1820, acc-0.7700, valid loss-0.1877, acc-0.7558, test loss-0.1899, acc-0.7501
Iter-3910, train loss-0.1990, acc-0.7300, valid loss-0.1876, acc-0.7564, test loss-0.1898, acc-0.7513
Iter-3920, train loss-0.2013, acc-0.7300, valid loss-0.1876, acc-0.7568, test loss-0.1901, acc-0.7509
Iter-3930, train loss-0.1760, acc-0.7900, valid loss-0.1878, acc-0.7562, test loss-0.1902, acc-0.7513
Iter-3940, train loss-0.2147, acc-0.6800, valid loss-0.1881, acc-0.7544, test loss-0.1903, acc-0.7520
Iter-3950, train loss-0.1900, acc-0.7900, valid loss-0.1881, acc-0.7550, test loss-0.1903, acc-0.7517
Iter-3960, train loss-0.2267, acc-0.6700, valid loss-0.1883, acc-0.7544, test loss-0.1906, acc-0.7520
Iter-3970, train loss-0.2231, acc-0.7100, valid loss-0.1883, acc-0.7568, test loss-0.1907, acc-0.7516
Iter-3980, train loss-0.1797, acc-0.7900, valid loss-0.1885, acc-0.7556, test loss-0.1910, acc-0.7511
Iter-3990, train loss-0.1673, acc-0.8000, valid loss-0.1884, acc-0.7562, test loss-0.1910, acc-0.7515
Iter-4000, train loss-0.2007, acc-0.7200, valid loss-0.1885, acc-0.7566, test loss-0.1911, acc-0.7512
Iter-4010, train loss-0.1672, acc-0.7800, valid loss-0.1883, acc-0.7580, test loss-0.1910, acc-0.7510
Iter-4020, train loss-0.2367, acc-0.6900, valid loss-0.1883, acc-0.7570, test loss-0.1910, acc-0.7510
Iter-4030, train loss-0.1574, acc-0.8100, valid loss-0.1881, acc-0.7574, test loss-0.1909, acc-0.7521
Iter-4040, train loss-0.1627, acc-0.7900, valid loss-0.1882, acc-0.7566, test loss-0.1910, acc-0.7517
Iter-4050, train loss-0.1752, acc-0.7600, valid loss-0.1883, acc-0.7568, test loss-0.1909, acc-0.7522
Iter-4060, train loss-0.1990, acc-0.7500, valid loss-0.1883, acc-0.7580, test loss-0.1911, acc-0.7520
Iter-4070, train loss-0.1708, acc-0.7600, valid loss-0.1883, acc-0.7584, test loss-0.1912, acc-0.7525
Iter-4080, train loss-0.1712, acc-0.8100, valid loss-0.1885, acc-0.7564, test loss-0.1914, acc-0.7517
Iter-4090, train loss-0.2161, acc-0.7200, valid loss-0.1887, acc-0.7562, test loss-0.1917, acc-0.7508
Iter-4100, train loss-0.1727, acc-0.7400, valid loss-0.1887, acc-0.7568, test loss-0.1917, acc-0.7504
Iter-4110, train loss-0.1551, acc-0.8300, valid loss-0.1889, acc-0.7570, test loss-0.1920, acc-0.7500
Iter-4120, train loss-0.1827, acc-0.7600, valid loss-0.1888, acc-0.7580, test loss-0.1919, acc-0.7506
Iter-4130, train loss-0.1976, acc-0.7500, valid loss-0.1891, acc-0.7562, test loss-0.1923, acc-0.7507
Iter-4140, train loss-0.1851, acc-0.7400, valid loss-0.1891, acc-0.7556, test loss-0.1923, acc-0.7502
Iter-4150, train loss-0.1964, acc-0.7600, valid loss-0.1894, acc-0.7564, test loss-0.1925, acc-0.7505
Iter-4160, train loss-0.2393, acc-0.6500, valid loss-0.1894, acc-0.7556, test loss-0.1926, acc-0.7502
Iter-4170, train loss-0.1907, acc-0.7500, valid loss-0.1892, acc-0.7564, test loss-0.1924, acc-0.7507
Iter-4180, train loss-0.2056, acc-0.7400, valid loss-0.1896, acc-0.7564, test loss-0.1929, acc-0.7509
Iter-4190, train loss-0.1554, acc-0.8400, valid loss-0.1902, acc-0.7546, test loss-0.1935, acc-0.7491
Iter-4200, train loss-0.2143, acc-0.6700, valid loss-0.1902, acc-0.7552, test loss-0.1937, acc-0.7490
Iter-4210, train loss-0.1911, acc-0.7500, valid loss-0.1904, acc-0.7568, test loss-0.1939, acc-0.7492
Iter-4220, train loss-0.1894, acc-0.7300, valid loss-0.1903, acc-0.7556, test loss-0.1938, acc-0.7489
Iter-4230, train loss-0.1891, acc-0.7500, valid loss-0.1903, acc-0.7554, test loss-0.1938, acc-0.7486
Iter-4240, train loss-0.2272, acc-0.7100, valid loss-0.1903, acc-0.7550, test loss-0.1938, acc-0.7489
Iter-4250, train loss-0.1948, acc-0.7700, valid loss-0.1904, acc-0.7562, test loss-0.1940, acc-0.7492
Iter-4260, train loss-0.2143, acc-0.7200, valid loss-0.1905, acc-0.7566, test loss-0.1941, acc-0.7488
Iter-4270, train loss-0.1736, acc-0.7900, valid loss-0.1905, acc-0.7560, test loss-0.1942, acc-0.7485
Iter-4280, train loss-0.1866, acc-0.7200, valid loss-0.1906, acc-0.7552, test loss-0.1941, acc-0.7484
Iter-4290, train loss-0.1626, acc-0.8100, valid loss-0.1906, acc-0.7556, test loss-0.1942, acc-0.7471
Iter-4300, train loss-0.2315, acc-0.6900, valid loss-0.1909, acc-0.7552, test loss-0.1944, acc-0.7471
Iter-4310, train loss-0.1829, acc-0.7700, valid loss-0.1913, acc-0.7540, test loss-0.1948, acc-0.7482
Iter-4320, train loss-0.1865, acc-0.7100, valid loss-0.1913, acc-0.7552, test loss-0.1949, acc-0.7488
Iter-4330, train loss-0.1784, acc-0.7700, valid loss-0.1915, acc-0.7532, test loss-0.1949, acc-0.7483
Iter-4340, train loss-0.1801, acc-0.7800, valid loss-0.1918, acc-0.7532, test loss-0.1953, acc-0.7477
Iter-4350, train loss-0.1879, acc-0.7400, valid loss-0.1920, acc-0.7554, test loss-0.1954, acc-0.7478
Iter-4360, train loss-0.2155, acc-0.6700, valid loss-0.1917, acc-0.7548, test loss-0.1952, acc-0.7482
Iter-4370, train loss-0.1935, acc-0.7900, valid loss-0.1921, acc-0.7554, test loss-0.1955, acc-0.7479
Iter-4380, train loss-0.1804, acc-0.8000, valid loss-0.1923, acc-0.7534, test loss-0.1956, acc-0.7482
Iter-4390, train loss-0.1872, acc-0.7300, valid loss-0.1926, acc-0.7536, test loss-0.1958, acc-0.7476
Iter-4400, train loss-0.1897, acc-0.7600, valid loss-0.1926, acc-0.7542, test loss-0.1960, acc-0.7475
Iter-4410, train loss-0.2115, acc-0.7600, valid loss-0.1927, acc-0.7564, test loss-0.1962, acc-0.7476
Iter-4420, train loss-0.1772, acc-0.7800, valid loss-0.1927, acc-0.7552, test loss-0.1963, acc-0.7483
Iter-4430, train loss-0.1935, acc-0.7400, valid loss-0.1928, acc-0.7536, test loss-0.1962, acc-0.7486
Iter-4440, train loss-0.1598, acc-0.8300, valid loss-0.1930, acc-0.7546, test loss-0.1965, acc-0.7491
Iter-4450, train loss-0.1748, acc-0.7600, valid loss-0.1934, acc-0.7546, test loss-0.1968, acc-0.7481
Iter-4460, train loss-0.2331, acc-0.6800, valid loss-0.1935, acc-0.7564, test loss-0.1970, acc-0.7483
Iter-4470, train loss-0.1666, acc-0.7800, valid loss-0.1934, acc-0.7540, test loss-0.1968, acc-0.7483
Iter-4480, train loss-0.2670, acc-0.6400, valid loss-0.1937, acc-0.7556, test loss-0.1972, acc-0.7487
Iter-4490, train loss-0.2342, acc-0.7100, valid loss-0.1935, acc-0.7576, test loss-0.1969, acc-0.7495
Iter-4500, train loss-0.1942, acc-0.7600, valid loss-0.1938, acc-0.7578, test loss-0.1971, acc-0.7499
Iter-4510, train loss-0.2192, acc-0.6900, valid loss-0.1940, acc-0.7586, test loss-0.1973, acc-0.7498
Iter-4520, train loss-0.1974, acc-0.8000, valid loss-0.1941, acc-0.7592, test loss-0.1975, acc-0.7501
Iter-4530, train loss-0.1702, acc-0.7800, valid loss-0.1944, acc-0.7580, test loss-0.1978, acc-0.7499
Iter-4540, train loss-0.2493, acc-0.6600, valid loss-0.1945, acc-0.7584, test loss-0.1978, acc-0.7504
Iter-4550, train loss-0.1845, acc-0.7500, valid loss-0.1945, acc-0.7570, test loss-0.1979, acc-0.7502
Iter-4560, train loss-0.2081, acc-0.7100, valid loss-0.1943, acc-0.7588, test loss-0.1978, acc-0.7510
Iter-4570, train loss-0.1953, acc-0.7200, valid loss-0.1944, acc-0.7592, test loss-0.1980, acc-0.7516
Iter-4580, train loss-0.1762, acc-0.7800, valid loss-0.1949, acc-0.7578, test loss-0.1985, acc-0.7493
Iter-4590, train loss-0.1939, acc-0.7700, valid loss-0.1951, acc-0.7558, test loss-0.1985, acc-0.7486
Iter-4600, train loss-0.2206, acc-0.6800, valid loss-0.1954, acc-0.7574, test loss-0.1990, acc-0.7487
Iter-4610, train loss-0.2055, acc-0.7300, valid loss-0.1956, acc-0.7552, test loss-0.1993, acc-0.7475
Iter-4620, train loss-0.2178, acc-0.7200, valid loss-0.1956, acc-0.7564, test loss-0.1995, acc-0.7473
Iter-4630, train loss-0.2137, acc-0.7400, valid loss-0.1960, acc-0.7548, test loss-0.1999, acc-0.7473
Iter-4640, train loss-0.2079, acc-0.7300, valid loss-0.1958, acc-0.7556, test loss-0.1997, acc-0.7486
Iter-4650, train loss-0.2203, acc-0.6800, valid loss-0.1957, acc-0.7578, test loss-0.1998, acc-0.7489
Iter-4660, train loss-0.1938, acc-0.7500, valid loss-0.1959, acc-0.7564, test loss-0.1999, acc-0.7485
Iter-4670, train loss-0.1909, acc-0.7600, valid loss-0.1966, acc-0.7566, test loss-0.2007, acc-0.7477
Iter-4680, train loss-0.2496, acc-0.6900, valid loss-0.1970, acc-0.7546, test loss-0.2011, acc-0.7466
Iter-4690, train loss-0.1813, acc-0.7600, valid loss-0.1969, acc-0.7566, test loss-0.2011, acc-0.7477
Iter-4700, train loss-0.1722, acc-0.7700, valid loss-0.1971, acc-0.7578, test loss-0.2014, acc-0.7476
Iter-4710, train loss-0.1889, acc-0.7800, valid loss-0.1973, acc-0.7578, test loss-0.2014, acc-0.7475
Iter-4720, train loss-0.1884, acc-0.7700, valid loss-0.1976, acc-0.7570, test loss-0.2017, acc-0.7470
Iter-4730, train loss-0.2146, acc-0.7600, valid loss-0.1981, acc-0.7570, test loss-0.2023, acc-0.7463
Iter-4740, train loss-0.1446, acc-0.8700, valid loss-0.1982, acc-0.7572, test loss-0.2023, acc-0.7463
Iter-4750, train loss-0.1994, acc-0.7700, valid loss-0.1985, acc-0.7564, test loss-0.2026, acc-0.7463
Iter-4760, train loss-0.1892, acc-0.7600, valid loss-0.1988, acc-0.7558, test loss-0.2028, acc-0.7467
Iter-4770, train loss-0.2121, acc-0.7600, valid loss-0.1994, acc-0.7550, test loss-0.2034, acc-0.7461
Iter-4780, train loss-0.2441, acc-0.7000, valid loss-0.1994, acc-0.7562, test loss-0.2034, acc-0.7456
Iter-4790, train loss-0.2083, acc-0.7000, valid loss-0.2001, acc-0.7550, test loss-0.2041, acc-0.7450
Iter-4800, train loss-0.2229, acc-0.7300, valid loss-0.2001, acc-0.7560, test loss-0.2042, acc-0.7448
Iter-4810, train loss-0.2657, acc-0.6400, valid loss-0.2005, acc-0.7558, test loss-0.2046, acc-0.7452
Iter-4820, train loss-0.1651, acc-0.7700, valid loss-0.2003, acc-0.7550, test loss-0.2045, acc-0.7464
Iter-4830, train loss-0.2226, acc-0.7200, valid loss-0.2005, acc-0.7552, test loss-0.2047, acc-0.7458
Iter-4840, train loss-0.2183, acc-0.7100, valid loss-0.2007, acc-0.7558, test loss-0.2049, acc-0.7462
Iter-4850, train loss-0.2050, acc-0.7100, valid loss-0.2013, acc-0.7552, test loss-0.2056, acc-0.7447
Iter-4860, train loss-0.2102, acc-0.7800, valid loss-0.2015, acc-0.7548, test loss-0.2058, acc-0.7436
Iter-4870, train loss-0.2441, acc-0.6600, valid loss-0.2017, acc-0.7552, test loss-0.2060, acc-0.7451
Iter-4880, train loss-0.2245, acc-0.7100, valid loss-0.2019, acc-0.7540, test loss-0.2062, acc-0.7450
Iter-4890, train loss-0.1904, acc-0.7900, valid loss-0.2021, acc-0.7544, test loss-0.2064, acc-0.7441
Iter-4900, train loss-0.2066, acc-0.7300, valid loss-0.2026, acc-0.7516, test loss-0.2069, acc-0.7426
Iter-4910, train loss-0.1923, acc-0.7400, valid loss-0.2029, acc-0.7514, test loss-0.2073, acc-0.7433
Iter-4920, train loss-0.1678, acc-0.7900, valid loss-0.2026, acc-0.7530, test loss-0.2071, acc-0.7438
Iter-4930, train loss-0.2370, acc-0.6900, valid loss-0.2028, acc-0.7524, test loss-0.2072, acc-0.7419
Iter-4940, train loss-0.1992, acc-0.7100, valid loss-0.2031, acc-0.7520, test loss-0.2076, acc-0.7419
Iter-4950, train loss-0.2109, acc-0.7200, valid loss-0.2029, acc-0.7532, test loss-0.2075, acc-0.7440
Iter-4960, train loss-0.2407, acc-0.6800, valid loss-0.2026, acc-0.7544, test loss-0.2072, acc-0.7448
Iter-4970, train loss-0.2387, acc-0.6900, valid loss-0.2028, acc-0.7536, test loss-0.2076, acc-0.7454
Iter-4980, train loss-0.1976, acc-0.7600, valid loss-0.2028, acc-0.7538, test loss-0.2074, acc-0.7457
Iter-4990, train loss-0.1683, acc-0.8200, valid loss-0.2029, acc-0.7544, test loss-0.2077, acc-0.7461
Iter-5000, train loss-0.2307, acc-0.6900, valid loss-0.2032, acc-0.7548, test loss-0.2080, acc-0.7463
Iter-5010, train loss-0.2031, acc-0.7400, valid loss-0.2033, acc-0.7550, test loss-0.2081, acc-0.7460
Iter-5020, train loss-0.2079, acc-0.7700, valid loss-0.2032, acc-0.7542, test loss-0.2082, acc-0.7463
Iter-5030, train loss-0.2087, acc-0.7400, valid loss-0.2031, acc-0.7566, test loss-0.2082, acc-0.7450
Iter-5040, train loss-0.2309, acc-0.7300, valid loss-0.2033, acc-0.7558, test loss-0.2083, acc-0.7450
Iter-5050, train loss-0.1932, acc-0.7600, valid loss-0.2036, acc-0.7554, test loss-0.2087, acc-0.7445
Iter-5060, train loss-0.1845, acc-0.7800, valid loss-0.2040, acc-0.7554, test loss-0.2090, acc-0.7443
Iter-5070, train loss-0.1955, acc-0.8100, valid loss-0.2040, acc-0.7554, test loss-0.2091, acc-0.7451
Iter-5080, train loss-0.2434, acc-0.7000, valid loss-0.2045, acc-0.7546, test loss-0.2097, acc-0.7442
Iter-5090, train loss-0.2002, acc-0.7700, valid loss-0.2045, acc-0.7544, test loss-0.2096, acc-0.7453
Iter-5100, train loss-0.1800, acc-0.7500, valid loss-0.2043, acc-0.7548, test loss-0.2095, acc-0.7459
Iter-5110, train loss-0.2514, acc-0.6100, valid loss-0.2043, acc-0.7542, test loss-0.2095, acc-0.7440
Iter-5120, train loss-0.2485, acc-0.6900, valid loss-0.2050, acc-0.7526, test loss-0.2100, acc-0.7426
Iter-5130, train loss-0.2627, acc-0.6600, valid loss-0.2052, acc-0.7522, test loss-0.2103, acc-0.7436
Iter-5140, train loss-0.2134, acc-0.7300, valid loss-0.2050, acc-0.7530, test loss-0.2101, acc-0.7441
Iter-5150, train loss-0.2312, acc-0.7300, valid loss-0.2054, acc-0.7522, test loss-0.2106, acc-0.7435
Iter-5160, train loss-0.2170, acc-0.7200, valid loss-0.2054, acc-0.7530, test loss-0.2106, acc-0.7422
Iter-5170, train loss-0.2178, acc-0.7400, valid loss-0.2056, acc-0.7514, test loss-0.2109, acc-0.7410
Iter-5180, train loss-0.2023, acc-0.7500, valid loss-0.2059, acc-0.7498, test loss-0.2112, acc-0.7405
Iter-5190, train loss-0.2231, acc-0.7100, valid loss-0.2062, acc-0.7498, test loss-0.2114, acc-0.7405
Iter-5200, train loss-0.2270, acc-0.7000, valid loss-0.2063, acc-0.7506, test loss-0.2114, acc-0.7408
Iter-5210, train loss-0.2275, acc-0.7100, valid loss-0.2067, acc-0.7500, test loss-0.2119, acc-0.7409
Iter-5220, train loss-0.2119, acc-0.7300, valid loss-0.2064, acc-0.7506, test loss-0.2117, acc-0.7416
Iter-5230, train loss-0.1653, acc-0.8100, valid loss-0.2066, acc-0.7490, test loss-0.2119, acc-0.7411
Iter-5240, train loss-0.2216, acc-0.7700, valid loss-0.2071, acc-0.7490, test loss-0.2122, acc-0.7396
Iter-5250, train loss-0.1982, acc-0.7500, valid loss-0.2069, acc-0.7498, test loss-0.2120, acc-0.7395
Iter-5260, train loss-0.2032, acc-0.7700, valid loss-0.2069, acc-0.7498, test loss-0.2121, acc-0.7395
Iter-5270, train loss-0.1940, acc-0.7700, valid loss-0.2070, acc-0.7496, test loss-0.2122, acc-0.7390
Iter-5280, train loss-0.1730, acc-0.8200, valid loss-0.2077, acc-0.7490, test loss-0.2128, acc-0.7382
Iter-5290, train loss-0.1973, acc-0.7700, valid loss-0.2076, acc-0.7490, test loss-0.2128, acc-0.7394
Iter-5300, train loss-0.2167, acc-0.7400, valid loss-0.2080, acc-0.7472, test loss-0.2132, acc-0.7387
Iter-5310, train loss-0.1954, acc-0.7900, valid loss-0.2082, acc-0.7484, test loss-0.2136, acc-0.7396
Iter-5320, train loss-0.2141, acc-0.7100, valid loss-0.2082, acc-0.7492, test loss-0.2135, acc-0.7396
Iter-5330, train loss-0.2266, acc-0.7000, valid loss-0.2083, acc-0.7502, test loss-0.2137, acc-0.7393
Iter-5340, train loss-0.2480, acc-0.6900, valid loss-0.2086, acc-0.7512, test loss-0.2140, acc-0.7378
Iter-5350, train loss-0.2109, acc-0.7400, valid loss-0.2091, acc-0.7490, test loss-0.2144, acc-0.7369
Iter-5360, train loss-0.2151, acc-0.7100, valid loss-0.2091, acc-0.7486, test loss-0.2142, acc-0.7367
Iter-5370, train loss-0.2610, acc-0.6200, valid loss-0.2094, acc-0.7492, test loss-0.2147, acc-0.7365
Iter-5380, train loss-0.2461, acc-0.7200, valid loss-0.2101, acc-0.7488, test loss-0.2154, acc-0.7367
Iter-5390, train loss-0.2270, acc-0.7400, valid loss-0.2103, acc-0.7458, test loss-0.2157, acc-0.7355
Iter-5400, train loss-0.2084, acc-0.7500, valid loss-0.2102, acc-0.7464, test loss-0.2155, acc-0.7362
Iter-5410, train loss-0.2262, acc-0.7300, valid loss-0.2108, acc-0.7452, test loss-0.2163, acc-0.7357
Iter-5420, train loss-0.2310, acc-0.7100, valid loss-0.2113, acc-0.7436, test loss-0.2167, acc-0.7351
Iter-5430, train loss-0.2235, acc-0.7500, valid loss-0.2115, acc-0.7430, test loss-0.2169, acc-0.7350
Iter-5440, train loss-0.2343, acc-0.7200, valid loss-0.2119, acc-0.7418, test loss-0.2173, acc-0.7336
Iter-5450, train loss-0.2353, acc-0.7600, valid loss-0.2122, acc-0.7418, test loss-0.2175, acc-0.7337
Iter-5460, train loss-0.2075, acc-0.7500, valid loss-0.2127, acc-0.7418, test loss-0.2181, acc-0.7335
Iter-5470, train loss-0.2261, acc-0.7500, valid loss-0.2126, acc-0.7422, test loss-0.2179, acc-0.7338
Iter-5480, train loss-0.2180, acc-0.7500, valid loss-0.2128, acc-0.7416, test loss-0.2181, acc-0.7332
Iter-5490, train loss-0.2136, acc-0.7500, valid loss-0.2131, acc-0.7414, test loss-0.2184, acc-0.7333
Iter-5500, train loss-0.2133, acc-0.7300, valid loss-0.2133, acc-0.7404, test loss-0.2187, acc-0.7333
Iter-5510, train loss-0.2556, acc-0.7000, valid loss-0.2137, acc-0.7412, test loss-0.2190, acc-0.7316
Iter-5520, train loss-0.2435, acc-0.6300, valid loss-0.2141, acc-0.7406, test loss-0.2194, acc-0.7319
Iter-5530, train loss-0.1852, acc-0.7400, valid loss-0.2139, acc-0.7410, test loss-0.2192, acc-0.7328
Iter-5540, train loss-0.2086, acc-0.7500, valid loss-0.2139, acc-0.7410, test loss-0.2192, acc-0.7322
Iter-5550, train loss-0.2262, acc-0.7400, valid loss-0.2142, acc-0.7414, test loss-0.2196, acc-0.7322
Iter-5560, train loss-0.2707, acc-0.6600, valid loss-0.2144, acc-0.7406, test loss-0.2199, acc-0.7316
Iter-5570, train loss-0.2379, acc-0.7000, valid loss-0.2140, acc-0.7428, test loss-0.2195, acc-0.7328
Iter-5580, train loss-0.2212, acc-0.7100, valid loss-0.2145, acc-0.7420, test loss-0.2198, acc-0.7329
Iter-5590, train loss-0.2005, acc-0.7400, valid loss-0.2147, acc-0.7404, test loss-0.2200, acc-0.7316
Iter-5600, train loss-0.2312, acc-0.7300, valid loss-0.2152, acc-0.7384, test loss-0.2205, acc-0.7311
Iter-5610, train loss-0.2516, acc-0.6900, valid loss-0.2155, acc-0.7374, test loss-0.2208, acc-0.7309
Iter-5620, train loss-0.2368, acc-0.7400, valid loss-0.2162, acc-0.7352, test loss-0.2214, acc-0.7293
Iter-5630, train loss-0.2380, acc-0.7200, valid loss-0.2163, acc-0.7354, test loss-0.2217, acc-0.7275
Iter-5640, train loss-0.1960, acc-0.7700, valid loss-0.2166, acc-0.7354, test loss-0.2220, acc-0.7289
Iter-5650, train loss-0.2319, acc-0.7300, valid loss-0.2169, acc-0.7370, test loss-0.2222, acc-0.7297
Iter-5660, train loss-0.2584, acc-0.6400, valid loss-0.2170, acc-0.7386, test loss-0.2222, acc-0.7296
Iter-5670, train loss-0.2705, acc-0.6300, valid loss-0.2171, acc-0.7374, test loss-0.2222, acc-0.7288
Iter-5680, train loss-0.2149, acc-0.7200, valid loss-0.2175, acc-0.7390, test loss-0.2226, acc-0.7289
Iter-5690, train loss-0.2181, acc-0.7500, valid loss-0.2181, acc-0.7386, test loss-0.2231, acc-0.7284
Iter-5700, train loss-0.2226, acc-0.7000, valid loss-0.2184, acc-0.7374, test loss-0.2235, acc-0.7282
Iter-5710, train loss-0.2161, acc-0.7500, valid loss-0.2183, acc-0.7384, test loss-0.2234, acc-0.7293
Iter-5720, train loss-0.2018, acc-0.7500, valid loss-0.2184, acc-0.7366, test loss-0.2234, acc-0.7286
Iter-5730, train loss-0.1959, acc-0.8000, valid loss-0.2183, acc-0.7374, test loss-0.2232, acc-0.7289
Iter-5740, train loss-0.2653, acc-0.6700, valid loss-0.2185, acc-0.7358, test loss-0.2233, acc-0.7281
Iter-5750, train loss-0.2703, acc-0.6300, valid loss-0.2184, acc-0.7372, test loss-0.2232, acc-0.7295
Iter-5760, train loss-0.2620, acc-0.6900, valid loss-0.2185, acc-0.7378, test loss-0.2234, acc-0.7307
Iter-5770, train loss-0.2524, acc-0.6800, valid loss-0.2186, acc-0.7370, test loss-0.2234, acc-0.7300
Iter-5780, train loss-0.2335, acc-0.6800, valid loss-0.2185, acc-0.7344, test loss-0.2232, acc-0.7294
Iter-5790, train loss-0.2440, acc-0.6400, valid loss-0.2188, acc-0.7348, test loss-0.2234, acc-0.7290
Iter-5800, train loss-0.2544, acc-0.6600, valid loss-0.2185, acc-0.7340, test loss-0.2231, acc-0.7286
Iter-5810, train loss-0.2341, acc-0.7000, valid loss-0.2189, acc-0.7344, test loss-0.2233, acc-0.7285
Iter-5820, train loss-0.2292, acc-0.7200, valid loss-0.2192, acc-0.7332, test loss-0.2236, acc-0.7282
Iter-5830, train loss-0.2427, acc-0.7200, valid loss-0.2195, acc-0.7314, test loss-0.2238, acc-0.7266
Iter-5840, train loss-0.2212, acc-0.7600, valid loss-0.2195, acc-0.7308, test loss-0.2240, acc-0.7259
Iter-5850, train loss-0.2251, acc-0.7400, valid loss-0.2197, acc-0.7318, test loss-0.2241, acc-0.7259
Iter-5860, train loss-0.2381, acc-0.7000, valid loss-0.2197, acc-0.7314, test loss-0.2241, acc-0.7272
Iter-5870, train loss-0.2606, acc-0.6600, valid loss-0.2197, acc-0.7312, test loss-0.2240, acc-0.7275
Iter-5880, train loss-0.2103, acc-0.7900, valid loss-0.2201, acc-0.7306, test loss-0.2244, acc-0.7274
Iter-5890, train loss-0.1988, acc-0.7700, valid loss-0.2199, acc-0.7320, test loss-0.2242, acc-0.7282
Iter-5900, train loss-0.2296, acc-0.7400, valid loss-0.2202, acc-0.7318, test loss-0.2243, acc-0.7271
Iter-5910, train loss-0.1983, acc-0.8100, valid loss-0.2202, acc-0.7316, test loss-0.2243, acc-0.7273
Iter-5920, train loss-0.2251, acc-0.6900, valid loss-0.2203, acc-0.7314, test loss-0.2243, acc-0.7266
Iter-5930, train loss-0.2152, acc-0.7600, valid loss-0.2206, acc-0.7306, test loss-0.2247, acc-0.7261
Iter-5940, train loss-0.1924, acc-0.7700, valid loss-0.2207, acc-0.7310, test loss-0.2249, acc-0.7259
Iter-5950, train loss-0.1923, acc-0.8000, valid loss-0.2204, acc-0.7312, test loss-0.2246, acc-0.7262
Iter-5960, train loss-0.2286, acc-0.7200, valid loss-0.2205, acc-0.7314, test loss-0.2248, acc-0.7254
Iter-5970, train loss-0.2076, acc-0.7600, valid loss-0.2205, acc-0.7304, test loss-0.2248, acc-0.7262
Iter-5980, train loss-0.2401, acc-0.7200, valid loss-0.2209, acc-0.7302, test loss-0.2251, acc-0.7264
Iter-5990, train loss-0.2451, acc-0.6500, valid loss-0.2211, acc-0.7306, test loss-0.2253, acc-0.7271
Iter-6000, train loss-0.2464, acc-0.7000, valid loss-0.2208, acc-0.7308, test loss-0.2251, acc-0.7278
Iter-6010, train loss-0.2064, acc-0.7700, valid loss-0.2213, acc-0.7304, test loss-0.2257, acc-0.7268
Iter-6020, train loss-0.2285, acc-0.7000, valid loss-0.2210, acc-0.7316, test loss-0.2253, acc-0.7269
Iter-6030, train loss-0.2167, acc-0.7400, valid loss-0.2209, acc-0.7316, test loss-0.2252, acc-0.7267
Iter-6040, train loss-0.2330, acc-0.7500, valid loss-0.2211, acc-0.7318, test loss-0.2254, acc-0.7272
Iter-6050, train loss-0.1953, acc-0.8300, valid loss-0.2215, acc-0.7310, test loss-0.2258, acc-0.7251
Iter-6060, train loss-0.2594, acc-0.6700, valid loss-0.2215, acc-0.7320, test loss-0.2259, acc-0.7253
Iter-6070, train loss-0.2131, acc-0.7800, valid loss-0.2216, acc-0.7320, test loss-0.2260, acc-0.7250
Iter-6080, train loss-0.2229, acc-0.7000, valid loss-0.2213, acc-0.7316, test loss-0.2256, acc-0.7243
Iter-6090, train loss-0.2092, acc-0.6700, valid loss-0.2212, acc-0.7338, test loss-0.2256, acc-0.7266
Iter-6100, train loss-0.2527, acc-0.6500, valid loss-0.2213, acc-0.7316, test loss-0.2257, acc-0.7253
Iter-6110, train loss-0.2238, acc-0.7500, valid loss-0.2217, acc-0.7304, test loss-0.2259, acc-0.7240
Iter-6120, train loss-0.2424, acc-0.6800, valid loss-0.2219, acc-0.7306, test loss-0.2261, acc-0.7239
Iter-6130, train loss-0.2073, acc-0.7400, valid loss-0.2222, acc-0.7298, test loss-0.2263, acc-0.7216
Iter-6140, train loss-0.1903, acc-0.8000, valid loss-0.2225, acc-0.7294, test loss-0.2266, acc-0.7206
Iter-6150, train loss-0.2057, acc-0.7600, valid loss-0.2224, acc-0.7298, test loss-0.2266, acc-0.7236
Iter-6160, train loss-0.1841, acc-0.7700, valid loss-0.2224, acc-0.7304, test loss-0.2264, acc-0.7237
Iter-6170, train loss-0.2146, acc-0.7500, valid loss-0.2224, acc-0.7332, test loss-0.2264, acc-0.7253
Iter-6180, train loss-0.2155, acc-0.7800, valid loss-0.2226, acc-0.7322, test loss-0.2266, acc-0.7249
Iter-6190, train loss-0.2289, acc-0.7000, valid loss-0.2228, acc-0.7320, test loss-0.2267, acc-0.7250
Iter-6200, train loss-0.2846, acc-0.6600, valid loss-0.2226, acc-0.7318, test loss-0.2265, acc-0.7266
Iter-6210, train loss-0.2190, acc-0.7400, valid loss-0.2229, acc-0.7328, test loss-0.2267, acc-0.7260
Iter-6220, train loss-0.2183, acc-0.7400, valid loss-0.2232, acc-0.7314, test loss-0.2269, acc-0.7238
Iter-6230, train loss-0.2839, acc-0.5900, valid loss-0.2231, acc-0.7326, test loss-0.2268, acc-0.7242
Iter-6240, train loss-0.2403, acc-0.7000, valid loss-0.2230, acc-0.7302, test loss-0.2266, acc-0.7225
Iter-6250, train loss-0.2163, acc-0.7000, valid loss-0.2229, acc-0.7312, test loss-0.2264, acc-0.7238
Iter-6260, train loss-0.1932, acc-0.7600, valid loss-0.2229, acc-0.7300, test loss-0.2263, acc-0.7226
Iter-6270, train loss-0.2168, acc-0.7500, valid loss-0.2226, acc-0.7320, test loss-0.2261, acc-0.7232
Iter-6280, train loss-0.2143, acc-0.7400, valid loss-0.2226, acc-0.7322, test loss-0.2261, acc-0.7246
Iter-6290, train loss-0.2606, acc-0.6600, valid loss-0.2226, acc-0.7312, test loss-0.2260, acc-0.7249
Iter-6300, train loss-0.2260, acc-0.7500, valid loss-0.2226, acc-0.7320, test loss-0.2260, acc-0.7245
Iter-6310, train loss-0.2195, acc-0.7300, valid loss-0.2226, acc-0.7324, test loss-0.2260, acc-0.7251
Iter-6320, train loss-0.2276, acc-0.7500, valid loss-0.2222, acc-0.7326, test loss-0.2255, acc-0.7268
Iter-6330, train loss-0.2332, acc-0.7000, valid loss-0.2223, acc-0.7328, test loss-0.2255, acc-0.7269
Iter-6340, train loss-0.2408, acc-0.6700, valid loss-0.2220, acc-0.7334, test loss-0.2252, acc-0.7278
Iter-6350, train loss-0.2033, acc-0.7600, valid loss-0.2223, acc-0.7332, test loss-0.2254, acc-0.7268
Iter-6360, train loss-0.2616, acc-0.7100, valid loss-0.2225, acc-0.7348, test loss-0.2256, acc-0.7285
Iter-6370, train loss-0.2311, acc-0.7200, valid loss-0.2226, acc-0.7346, test loss-0.2258, acc-0.7274
Iter-6380, train loss-0.2080, acc-0.7900, valid loss-0.2229, acc-0.7352, test loss-0.2260, acc-0.7280
Iter-6390, train loss-0.1930, acc-0.7400, valid loss-0.2229, acc-0.7360, test loss-0.2260, acc-0.7278
Iter-6400, train loss-0.1920, acc-0.8100, valid loss-0.2228, acc-0.7354, test loss-0.2258, acc-0.7276
Iter-6410, train loss-0.2062, acc-0.7200, valid loss-0.2230, acc-0.7344, test loss-0.2259, acc-0.7278
Iter-6420, train loss-0.2122, acc-0.7100, valid loss-0.2227, acc-0.7352, test loss-0.2257, acc-0.7265
Iter-6430, train loss-0.2442, acc-0.7200, valid loss-0.2229, acc-0.7348, test loss-0.2258, acc-0.7256
Iter-6440, train loss-0.1796, acc-0.7500, valid loss-0.2227, acc-0.7340, test loss-0.2257, acc-0.7253
Iter-6450, train loss-0.2297, acc-0.7000, valid loss-0.2224, acc-0.7338, test loss-0.2253, acc-0.7262
Iter-6460, train loss-0.2359, acc-0.7200, valid loss-0.2223, acc-0.7346, test loss-0.2252, acc-0.7256
Iter-6470, train loss-0.2166, acc-0.7100, valid loss-0.2227, acc-0.7362, test loss-0.2256, acc-0.7254
Iter-6480, train loss-0.2187, acc-0.7500, valid loss-0.2230, acc-0.7332, test loss-0.2259, acc-0.7245
Iter-6490, train loss-0.2091, acc-0.7700, valid loss-0.2225, acc-0.7356, test loss-0.2254, acc-0.7261
Iter-6500, train loss-0.1927, acc-0.7400, valid loss-0.2224, acc-0.7356, test loss-0.2253, acc-0.7259
Iter-6510, train loss-0.2015, acc-0.7700, valid loss-0.2223, acc-0.7348, test loss-0.2254, acc-0.7260
Iter-6520, train loss-0.2428, acc-0.6400, valid loss-0.2223, acc-0.7388, test loss-0.2253, acc-0.7288
Iter-6530, train loss-0.1943, acc-0.8000, valid loss-0.2222, acc-0.7382, test loss-0.2252, acc-0.7290
Iter-6540, train loss-0.2212, acc-0.7400, valid loss-0.2222, acc-0.7372, test loss-0.2250, acc-0.7286
Iter-6550, train loss-0.2341, acc-0.6800, valid loss-0.2222, acc-0.7378, test loss-0.2250, acc-0.7288
Iter-6560, train loss-0.1829, acc-0.7600, valid loss-0.2219, acc-0.7388, test loss-0.2247, acc-0.7299
Iter-6570, train loss-0.2603, acc-0.6800, valid loss-0.2217, acc-0.7386, test loss-0.2245, acc-0.7300
Iter-6580, train loss-0.2298, acc-0.7200, valid loss-0.2216, acc-0.7392, test loss-0.2242, acc-0.7312
Iter-6590, train loss-0.2601, acc-0.6900, valid loss-0.2217, acc-0.7384, test loss-0.2244, acc-0.7298
Iter-6600, train loss-0.2574, acc-0.6400, valid loss-0.2215, acc-0.7392, test loss-0.2242, acc-0.7302
Iter-6610, train loss-0.2417, acc-0.6700, valid loss-0.2213, acc-0.7398, test loss-0.2240, acc-0.7310
Iter-6620, train loss-0.2238, acc-0.7300, valid loss-0.2211, acc-0.7404, test loss-0.2237, acc-0.7326
Iter-6630, train loss-0.1908, acc-0.7600, valid loss-0.2210, acc-0.7380, test loss-0.2237, acc-0.7316
Iter-6640, train loss-0.2143, acc-0.7100, valid loss-0.2211, acc-0.7392, test loss-0.2238, acc-0.7320
Iter-6650, train loss-0.2254, acc-0.7200, valid loss-0.2208, acc-0.7392, test loss-0.2236, acc-0.7320
Iter-6660, train loss-0.2282, acc-0.7100, valid loss-0.2209, acc-0.7380, test loss-0.2236, acc-0.7318
Iter-6670, train loss-0.2373, acc-0.6600, valid loss-0.2208, acc-0.7372, test loss-0.2235, acc-0.7310
Iter-6680, train loss-0.2145, acc-0.7100, valid loss-0.2208, acc-0.7366, test loss-0.2234, acc-0.7305
Iter-6690, train loss-0.2041, acc-0.7500, valid loss-0.2205, acc-0.7338, test loss-0.2232, acc-0.7290
Iter-6700, train loss-0.2116, acc-0.7300, valid loss-0.2205, acc-0.7380, test loss-0.2231, acc-0.7321
Iter-6710, train loss-0.2328, acc-0.7200, valid loss-0.2206, acc-0.7376, test loss-0.2233, acc-0.7312
Iter-6720, train loss-0.2302, acc-0.7300, valid loss-0.2210, acc-0.7380, test loss-0.2237, acc-0.7305
Iter-6730, train loss-0.1867, acc-0.7600, valid loss-0.2209, acc-0.7376, test loss-0.2236, acc-0.7304
Iter-6740, train loss-0.2148, acc-0.7700, valid loss-0.2207, acc-0.7370, test loss-0.2234, acc-0.7296
Iter-6750, train loss-0.1892, acc-0.8200, valid loss-0.2203, acc-0.7372, test loss-0.2228, acc-0.7306
Iter-6760, train loss-0.1687, acc-0.8000, valid loss-0.2200, acc-0.7368, test loss-0.2227, acc-0.7299
Iter-6770, train loss-0.2061, acc-0.7500, valid loss-0.2201, acc-0.7384, test loss-0.2227, acc-0.7298
Iter-6780, train loss-0.2290, acc-0.7200, valid loss-0.2201, acc-0.7372, test loss-0.2226, acc-0.7304
Iter-6790, train loss-0.2331, acc-0.7300, valid loss-0.2199, acc-0.7372, test loss-0.2224, acc-0.7281
Iter-6800, train loss-0.2609, acc-0.7000, valid loss-0.2199, acc-0.7362, test loss-0.2223, acc-0.7287
Iter-6810, train loss-0.2131, acc-0.7600, valid loss-0.2199, acc-0.7366, test loss-0.2222, acc-0.7301
Iter-6820, train loss-0.2297, acc-0.7200, valid loss-0.2198, acc-0.7360, test loss-0.2220, acc-0.7306
Iter-6830, train loss-0.2055, acc-0.7200, valid loss-0.2198, acc-0.7376, test loss-0.2218, acc-0.7309
Iter-6840, train loss-0.2160, acc-0.7000, valid loss-0.2197, acc-0.7382, test loss-0.2218, acc-0.7302
Iter-6850, train loss-0.2186, acc-0.7100, valid loss-0.2195, acc-0.7384, test loss-0.2215, acc-0.7313
Iter-6860, train loss-0.2376, acc-0.7100, valid loss-0.2196, acc-0.7384, test loss-0.2215, acc-0.7313
Iter-6870, train loss-0.2043, acc-0.7700, valid loss-0.2194, acc-0.7382, test loss-0.2212, acc-0.7306
Iter-6880, train loss-0.2109, acc-0.7300, valid loss-0.2190, acc-0.7380, test loss-0.2208, acc-0.7305
Iter-6890, train loss-0.1709, acc-0.7900, valid loss-0.2191, acc-0.7392, test loss-0.2210, acc-0.7298
Iter-6900, train loss-0.1951, acc-0.7900, valid loss-0.2192, acc-0.7376, test loss-0.2211, acc-0.7288
Iter-6910, train loss-0.2301, acc-0.7200, valid loss-0.2194, acc-0.7382, test loss-0.2212, acc-0.7288
Iter-6920, train loss-0.2676, acc-0.6500, valid loss-0.2193, acc-0.7362, test loss-0.2211, acc-0.7270
Iter-6930, train loss-0.2083, acc-0.7700, valid loss-0.2194, acc-0.7366, test loss-0.2211, acc-0.7288
Iter-6940, train loss-0.2106, acc-0.7800, valid loss-0.2193, acc-0.7384, test loss-0.2209, acc-0.7302
Iter-6950, train loss-0.2549, acc-0.6700, valid loss-0.2191, acc-0.7374, test loss-0.2207, acc-0.7301
Iter-6960, train loss-0.2074, acc-0.7500, valid loss-0.2191, acc-0.7370, test loss-0.2206, acc-0.7288
Iter-6970, train loss-0.2592, acc-0.6900, valid loss-0.2190, acc-0.7378, test loss-0.2206, acc-0.7305
Iter-6980, train loss-0.2249, acc-0.7000, valid loss-0.2190, acc-0.7390, test loss-0.2206, acc-0.7314
Iter-6990, train loss-0.2116, acc-0.7400, valid loss-0.2188, acc-0.7368, test loss-0.2203, acc-0.7303
Iter-7000, train loss-0.2065, acc-0.7100, valid loss-0.2188, acc-0.7350, test loss-0.2203, acc-0.7308
Iter-7010, train loss-0.2575, acc-0.6700, valid loss-0.2187, acc-0.7362, test loss-0.2201, acc-0.7312
Iter-7020, train loss-0.2272, acc-0.7500, valid loss-0.2186, acc-0.7334, test loss-0.2199, acc-0.7291
Iter-7030, train loss-0.2378, acc-0.7200, valid loss-0.2184, acc-0.7336, test loss-0.2196, acc-0.7296
Iter-7040, train loss-0.2298, acc-0.6900, valid loss-0.2185, acc-0.7324, test loss-0.2198, acc-0.7285
Iter-7050, train loss-0.1858, acc-0.7600, valid loss-0.2187, acc-0.7290, test loss-0.2199, acc-0.7274
Iter-7060, train loss-0.2080, acc-0.7200, valid loss-0.2184, acc-0.7308, test loss-0.2194, acc-0.7287
Iter-7070, train loss-0.2512, acc-0.6200, valid loss-0.2184, acc-0.7316, test loss-0.2192, acc-0.7304
Iter-7080, train loss-0.2218, acc-0.7300, valid loss-0.2184, acc-0.7320, test loss-0.2191, acc-0.7307
Iter-7090, train loss-0.1809, acc-0.8100, valid loss-0.2186, acc-0.7324, test loss-0.2193, acc-0.7302
Iter-7100, train loss-0.2262, acc-0.6700, valid loss-0.2181, acc-0.7326, test loss-0.2188, acc-0.7311
Iter-7110, train loss-0.2576, acc-0.6500, valid loss-0.2182, acc-0.7354, test loss-0.2188, acc-0.7327
Iter-7120, train loss-0.2223, acc-0.7500, valid loss-0.2183, acc-0.7336, test loss-0.2189, acc-0.7307
Iter-7130, train loss-0.2147, acc-0.7500, valid loss-0.2182, acc-0.7336, test loss-0.2187, acc-0.7312
Iter-7140, train loss-0.2151, acc-0.7100, valid loss-0.2178, acc-0.7354, test loss-0.2182, acc-0.7338
Iter-7150, train loss-0.2233, acc-0.6800, valid loss-0.2177, acc-0.7348, test loss-0.2183, acc-0.7334
Iter-7160, train loss-0.2256, acc-0.7600, valid loss-0.2174, acc-0.7374, test loss-0.2178, acc-0.7345
Iter-7170, train loss-0.1786, acc-0.8300, valid loss-0.2175, acc-0.7386, test loss-0.2178, acc-0.7352
Iter-7180, train loss-0.2243, acc-0.6900, valid loss-0.2171, acc-0.7396, test loss-0.2174, acc-0.7356
Iter-7190, train loss-0.2182, acc-0.7400, valid loss-0.2169, acc-0.7358, test loss-0.2171, acc-0.7348
Iter-7200, train loss-0.2138, acc-0.7400, valid loss-0.2169, acc-0.7366, test loss-0.2169, acc-0.7360
Iter-7210, train loss-0.2621, acc-0.6600, valid loss-0.2171, acc-0.7348, test loss-0.2171, acc-0.7357
Iter-7220, train loss-0.2310, acc-0.7200, valid loss-0.2172, acc-0.7334, test loss-0.2170, acc-0.7355
Iter-7230, train loss-0.2100, acc-0.7800, valid loss-0.2172, acc-0.7330, test loss-0.2168, acc-0.7361
Iter-7240, train loss-0.2009, acc-0.7900, valid loss-0.2174, acc-0.7360, test loss-0.2169, acc-0.7375
Iter-7250, train loss-0.2314, acc-0.6500, valid loss-0.2173, acc-0.7338, test loss-0.2168, acc-0.7357
Iter-7260, train loss-0.2104, acc-0.7200, valid loss-0.2176, acc-0.7340, test loss-0.2170, acc-0.7356
Iter-7270, train loss-0.2422, acc-0.6500, valid loss-0.2174, acc-0.7326, test loss-0.2168, acc-0.7338
Iter-7280, train loss-0.2202, acc-0.6900, valid loss-0.2175, acc-0.7324, test loss-0.2169, acc-0.7345
Iter-7290, train loss-0.2197, acc-0.7600, valid loss-0.2172, acc-0.7348, test loss-0.2166, acc-0.7357
Iter-7300, train loss-0.1934, acc-0.7800, valid loss-0.2171, acc-0.7322, test loss-0.2165, acc-0.7350
Iter-7310, train loss-0.2055, acc-0.7400, valid loss-0.2168, acc-0.7360, test loss-0.2160, acc-0.7368
Iter-7320, train loss-0.1902, acc-0.7500, valid loss-0.2170, acc-0.7336, test loss-0.2161, acc-0.7361
Iter-7330, train loss-0.2388, acc-0.6800, valid loss-0.2168, acc-0.7364, test loss-0.2158, acc-0.7381
Iter-7340, train loss-0.2419, acc-0.7200, valid loss-0.2170, acc-0.7358, test loss-0.2159, acc-0.7380
Iter-7350, train loss-0.2405, acc-0.6700, valid loss-0.2169, acc-0.7358, test loss-0.2157, acc-0.7394
Iter-7360, train loss-0.2338, acc-0.7000, valid loss-0.2169, acc-0.7338, test loss-0.2156, acc-0.7380
Iter-7370, train loss-0.2125, acc-0.7300, valid loss-0.2171, acc-0.7358, test loss-0.2157, acc-0.7376
Iter-7380, train loss-0.2040, acc-0.7600, valid loss-0.2173, acc-0.7330, test loss-0.2160, acc-0.7380
Iter-7390, train loss-0.2117, acc-0.7400, valid loss-0.2174, acc-0.7338, test loss-0.2160, acc-0.7370
Iter-7400, train loss-0.2503, acc-0.7000, valid loss-0.2171, acc-0.7342, test loss-0.2157, acc-0.7383
Iter-7410, train loss-0.1856, acc-0.7700, valid loss-0.2177, acc-0.7348, test loss-0.2162, acc-0.7367
Iter-7420, train loss-0.2109, acc-0.7200, valid loss-0.2180, acc-0.7316, test loss-0.2164, acc-0.7354
Iter-7430, train loss-0.2237, acc-0.7600, valid loss-0.2179, acc-0.7312, test loss-0.2162, acc-0.7366
Iter-7440, train loss-0.2297, acc-0.6700, valid loss-0.2183, acc-0.7318, test loss-0.2165, acc-0.7362
Iter-7450, train loss-0.2129, acc-0.7000, valid loss-0.2183, acc-0.7314, test loss-0.2164, acc-0.7367
Iter-7460, train loss-0.2260, acc-0.7300, valid loss-0.2187, acc-0.7306, test loss-0.2167, acc-0.7355
Iter-7470, train loss-0.2309, acc-0.6800, valid loss-0.2193, acc-0.7292, test loss-0.2173, acc-0.7333
Iter-7480, train loss-0.2562, acc-0.6800, valid loss-0.2192, acc-0.7272, test loss-0.2172, acc-0.7324
Iter-7490, train loss-0.2828, acc-0.5900, valid loss-0.2195, acc-0.7274, test loss-0.2174, acc-0.7333
Iter-7500, train loss-0.2335, acc-0.7000, valid loss-0.2202, acc-0.7252, test loss-0.2181, acc-0.7317
Iter-7510, train loss-0.2127, acc-0.7500, valid loss-0.2202, acc-0.7254, test loss-0.2180, acc-0.7316
Iter-7520, train loss-0.2337, acc-0.6900, valid loss-0.2207, acc-0.7232, test loss-0.2184, acc-0.7302
Iter-7530, train loss-0.2039, acc-0.7600, valid loss-0.2210, acc-0.7248, test loss-0.2185, acc-0.7298
Iter-7540, train loss-0.2392, acc-0.6700, valid loss-0.2219, acc-0.7238, test loss-0.2191, acc-0.7287
Iter-7550, train loss-0.2531, acc-0.6400, valid loss-0.2220, acc-0.7248, test loss-0.2192, acc-0.7288
Iter-7560, train loss-0.2368, acc-0.7000, valid loss-0.2221, acc-0.7228, test loss-0.2192, acc-0.7276
Iter-7570, train loss-0.2506, acc-0.6500, valid loss-0.2222, acc-0.7232, test loss-0.2192, acc-0.7292
Iter-7580, train loss-0.2378, acc-0.7000, valid loss-0.2227, acc-0.7232, test loss-0.2196, acc-0.7274
Iter-7590, train loss-0.2190, acc-0.6900, valid loss-0.2230, acc-0.7236, test loss-0.2197, acc-0.7282
Iter-7600, train loss-0.2608, acc-0.6500, valid loss-0.2229, acc-0.7228, test loss-0.2197, acc-0.7283
Iter-7610, train loss-0.2485, acc-0.7300, valid loss-0.2230, acc-0.7220, test loss-0.2197, acc-0.7277
Iter-7620, train loss-0.2118, acc-0.7600, valid loss-0.2236, acc-0.7206, test loss-0.2202, acc-0.7268
Iter-7630, train loss-0.1910, acc-0.7400, valid loss-0.2238, acc-0.7202, test loss-0.2204, acc-0.7262
Iter-7640, train loss-0.2099, acc-0.7200, valid loss-0.2246, acc-0.7194, test loss-0.2209, acc-0.7261
Iter-7650, train loss-0.2323, acc-0.7400, valid loss-0.2243, acc-0.7202, test loss-0.2206, acc-0.7253
Iter-7660, train loss-0.1961, acc-0.7300, valid loss-0.2247, acc-0.7190, test loss-0.2210, acc-0.7235
Iter-7670, train loss-0.2429, acc-0.6900, valid loss-0.2243, acc-0.7180, test loss-0.2206, acc-0.7239
Iter-7680, train loss-0.2270, acc-0.7100, valid loss-0.2246, acc-0.7178, test loss-0.2209, acc-0.7240
Iter-7690, train loss-0.2061, acc-0.7700, valid loss-0.2236, acc-0.7190, test loss-0.2200, acc-0.7255
Iter-7700, train loss-0.2333, acc-0.7000, valid loss-0.2242, acc-0.7190, test loss-0.2203, acc-0.7248
Iter-7710, train loss-0.1982, acc-0.7900, valid loss-0.2244, acc-0.7182, test loss-0.2206, acc-0.7239
Iter-7720, train loss-0.2074, acc-0.7700, valid loss-0.2247, acc-0.7178, test loss-0.2209, acc-0.7236
Iter-7730, train loss-0.1929, acc-0.7700, valid loss-0.2251, acc-0.7170, test loss-0.2213, acc-0.7225
Iter-7740, train loss-0.2437, acc-0.6900, valid loss-0.2251, acc-0.7170, test loss-0.2214, acc-0.7223
Iter-7750, train loss-0.1904, acc-0.7500, valid loss-0.2255, acc-0.7156, test loss-0.2219, acc-0.7231
Iter-7760, train loss-0.2027, acc-0.7400, valid loss-0.2246, acc-0.7180, test loss-0.2211, acc-0.7232
Iter-7770, train loss-0.2294, acc-0.7100, valid loss-0.2247, acc-0.7182, test loss-0.2211, acc-0.7236
Iter-7780, train loss-0.2505, acc-0.7000, valid loss-0.2249, acc-0.7180, test loss-0.2214, acc-0.7220
Iter-7790, train loss-0.2312, acc-0.6900, valid loss-0.2252, acc-0.7180, test loss-0.2216, acc-0.7226
Iter-7800, train loss-0.2069, acc-0.7200, valid loss-0.2259, acc-0.7140, test loss-0.2224, acc-0.7211
Iter-7810, train loss-0.2577, acc-0.6400, valid loss-0.2260, acc-0.7132, test loss-0.2225, acc-0.7207
Iter-7820, train loss-0.2195, acc-0.6900, valid loss-0.2263, acc-0.7118, test loss-0.2228, acc-0.7199
Iter-7830, train loss-0.2125, acc-0.7400, valid loss-0.2267, acc-0.7104, test loss-0.2233, acc-0.7168
Iter-7840, train loss-0.2492, acc-0.6900, valid loss-0.2266, acc-0.7096, test loss-0.2233, acc-0.7154
Iter-7850, train loss-0.2389, acc-0.6900, valid loss-0.2268, acc-0.7108, test loss-0.2235, acc-0.7157
Iter-7860, train loss-0.2265, acc-0.6900, valid loss-0.2270, acc-0.7106, test loss-0.2237, acc-0.7143
Iter-7870, train loss-0.2372, acc-0.7300, valid loss-0.2272, acc-0.7088, test loss-0.2240, acc-0.7142
Iter-7880, train loss-0.2277, acc-0.6900, valid loss-0.2265, acc-0.7112, test loss-0.2234, acc-0.7170
Iter-7890, train loss-0.2295, acc-0.6600, valid loss-0.2269, acc-0.7098, test loss-0.2237, acc-0.7164
Iter-7900, train loss-0.1847, acc-0.8100, valid loss-0.2275, acc-0.7070, test loss-0.2243, acc-0.7135
Iter-7910, train loss-0.2449, acc-0.6600, valid loss-0.2270, acc-0.7088, test loss-0.2239, acc-0.7156
Iter-7920, train loss-0.2578, acc-0.6400, valid loss-0.2271, acc-0.7084, test loss-0.2242, acc-0.7148
Iter-7930, train loss-0.2317, acc-0.7100, valid loss-0.2277, acc-0.7072, test loss-0.2247, acc-0.7145
Iter-7940, train loss-0.2054, acc-0.7200, valid loss-0.2279, acc-0.7050, test loss-0.2249, acc-0.7149
Iter-7950, train loss-0.2401, acc-0.6800, valid loss-0.2282, acc-0.7048, test loss-0.2253, acc-0.7132
Iter-7960, train loss-0.2048, acc-0.7200, valid loss-0.2285, acc-0.7038, test loss-0.2256, acc-0.7126
Iter-7970, train loss-0.2072, acc-0.7700, valid loss-0.2285, acc-0.7038, test loss-0.2256, acc-0.7134
Iter-7980, train loss-0.2237, acc-0.7300, valid loss-0.2285, acc-0.7038, test loss-0.2257, acc-0.7139
Iter-7990, train loss-0.2761, acc-0.6700, valid loss-0.2287, acc-0.7044, test loss-0.2260, acc-0.7139
Iter-8000, train loss-0.2919, acc-0.5800, valid loss-0.2289, acc-0.7018, test loss-0.2263, acc-0.7136
Iter-8010, train loss-0.2410, acc-0.7400, valid loss-0.2293, acc-0.7010, test loss-0.2267, acc-0.7117
Iter-8020, train loss-0.2166, acc-0.7300, valid loss-0.2292, acc-0.7008, test loss-0.2267, acc-0.7101
Iter-8030, train loss-0.1922, acc-0.7600, valid loss-0.2293, acc-0.7016, test loss-0.2268, acc-0.7097
Iter-8040, train loss-0.2040, acc-0.7400, valid loss-0.2293, acc-0.7008, test loss-0.2268, acc-0.7096
Iter-8050, train loss-0.2530, acc-0.6500, valid loss-0.2298, acc-0.7008, test loss-0.2273, acc-0.7086
Iter-8060, train loss-0.2497, acc-0.7000, valid loss-0.2299, acc-0.7006, test loss-0.2275, acc-0.7086
Iter-8070, train loss-0.1986, acc-0.7600, valid loss-0.2303, acc-0.6996, test loss-0.2279, acc-0.7065
Iter-8080, train loss-0.2035, acc-0.7400, valid loss-0.2302, acc-0.7002, test loss-0.2277, acc-0.7073
Iter-8090, train loss-0.2448, acc-0.6700, valid loss-0.2304, acc-0.6994, test loss-0.2280, acc-0.7054
Iter-8100, train loss-0.2422, acc-0.6700, valid loss-0.2305, acc-0.6982, test loss-0.2282, acc-0.7052
Iter-8110, train loss-0.2782, acc-0.6300, valid loss-0.2298, acc-0.6996, test loss-0.2275, acc-0.7061
Iter-8120, train loss-0.2431, acc-0.7100, valid loss-0.2298, acc-0.6988, test loss-0.2275, acc-0.7043
Iter-8130, train loss-0.2287, acc-0.7100, valid loss-0.2295, acc-0.6994, test loss-0.2273, acc-0.7049
Iter-8140, train loss-0.2307, acc-0.7300, valid loss-0.2300, acc-0.6982, test loss-0.2279, acc-0.7040
Iter-8150, train loss-0.1824, acc-0.7900, valid loss-0.2302, acc-0.6982, test loss-0.2279, acc-0.7045
Iter-8160, train loss-0.2299, acc-0.7200, valid loss-0.2297, acc-0.6986, test loss-0.2276, acc-0.7036
Iter-8170, train loss-0.1988, acc-0.7800, valid loss-0.2297, acc-0.6988, test loss-0.2277, acc-0.7016
Iter-8180, train loss-0.2423, acc-0.6400, valid loss-0.2288, acc-0.7010, test loss-0.2269, acc-0.7041
Iter-8190, train loss-0.2570, acc-0.6700, valid loss-0.2291, acc-0.6998, test loss-0.2273, acc-0.7044
Iter-8200, train loss-0.2241, acc-0.7000, valid loss-0.2286, acc-0.6988, test loss-0.2268, acc-0.7043
Iter-8210, train loss-0.2318, acc-0.7100, valid loss-0.2284, acc-0.7014, test loss-0.2265, acc-0.7049
Iter-8220, train loss-0.2259, acc-0.6900, valid loss-0.2279, acc-0.6996, test loss-0.2261, acc-0.7051
Iter-8230, train loss-0.2305, acc-0.6900, valid loss-0.2277, acc-0.6996, test loss-0.2260, acc-0.7039
Iter-8240, train loss-0.2455, acc-0.7000, valid loss-0.2274, acc-0.6990, test loss-0.2258, acc-0.7042
Iter-8250, train loss-0.2222, acc-0.7500, valid loss-0.2278, acc-0.6976, test loss-0.2262, acc-0.7042
Iter-8260, train loss-0.1856, acc-0.7600, valid loss-0.2282, acc-0.6964, test loss-0.2266, acc-0.7029
Iter-8270, train loss-0.2171, acc-0.7100, valid loss-0.2279, acc-0.6972, test loss-0.2264, acc-0.7021
Iter-8280, train loss-0.1917, acc-0.8000, valid loss-0.2278, acc-0.6976, test loss-0.2263, acc-0.7006
Iter-8290, train loss-0.2482, acc-0.6000, valid loss-0.2278, acc-0.6986, test loss-0.2263, acc-0.7012
Iter-8300, train loss-0.2511, acc-0.6800, valid loss-0.2276, acc-0.6988, test loss-0.2260, acc-0.7014
Iter-8310, train loss-0.2362, acc-0.7300, valid loss-0.2282, acc-0.6976, test loss-0.2267, acc-0.6989
Iter-8320, train loss-0.2471, acc-0.6500, valid loss-0.2282, acc-0.6968, test loss-0.2267, acc-0.7004
Iter-8330, train loss-0.2312, acc-0.7400, valid loss-0.2282, acc-0.6966, test loss-0.2267, acc-0.7001
Iter-8340, train loss-0.1915, acc-0.7800, valid loss-0.2279, acc-0.7006, test loss-0.2263, acc-0.7043
Iter-8350, train loss-0.2323, acc-0.6900, valid loss-0.2277, acc-0.7008, test loss-0.2262, acc-0.7056
Iter-8360, train loss-0.2606, acc-0.6700, valid loss-0.2271, acc-0.7034, test loss-0.2257, acc-0.7074
Iter-8370, train loss-0.2563, acc-0.6800, valid loss-0.2273, acc-0.6998, test loss-0.2259, acc-0.7038
Iter-8380, train loss-0.2419, acc-0.6700, valid loss-0.2271, acc-0.7026, test loss-0.2257, acc-0.7073
Iter-8390, train loss-0.1886, acc-0.7800, valid loss-0.2271, acc-0.7032, test loss-0.2257, acc-0.7070
Iter-8400, train loss-0.2021, acc-0.7500, valid loss-0.2271, acc-0.7032, test loss-0.2257, acc-0.7069
Iter-8410, train loss-0.2355, acc-0.6900, valid loss-0.2267, acc-0.7060, test loss-0.2253, acc-0.7093
Iter-8420, train loss-0.2162, acc-0.7200, valid loss-0.2267, acc-0.7062, test loss-0.2254, acc-0.7088
Iter-8430, train loss-0.1869, acc-0.8000, valid loss-0.2265, acc-0.7054, test loss-0.2252, acc-0.7092
Iter-8440, train loss-0.2391, acc-0.6500, valid loss-0.2270, acc-0.7060, test loss-0.2257, acc-0.7093
Iter-8450, train loss-0.2470, acc-0.6500, valid loss-0.2269, acc-0.7040, test loss-0.2256, acc-0.7084
Iter-8460, train loss-0.2115, acc-0.7400, valid loss-0.2271, acc-0.7052, test loss-0.2258, acc-0.7085
Iter-8470, train loss-0.1696, acc-0.8000, valid loss-0.2271, acc-0.7060, test loss-0.2258, acc-0.7080
Iter-8480, train loss-0.1985, acc-0.7700, valid loss-0.2262, acc-0.7070, test loss-0.2250, acc-0.7085
Iter-8490, train loss-0.2529, acc-0.6500, valid loss-0.2258, acc-0.7088, test loss-0.2246, acc-0.7113
Iter-8500, train loss-0.2413, acc-0.6900, valid loss-0.2260, acc-0.7090, test loss-0.2248, acc-0.7113
Iter-8510, train loss-0.1760, acc-0.8000, valid loss-0.2266, acc-0.7072, test loss-0.2254, acc-0.7116
Iter-8520, train loss-0.2205, acc-0.7700, valid loss-0.2259, acc-0.7078, test loss-0.2248, acc-0.7126
Iter-8530, train loss-0.2320, acc-0.6900, valid loss-0.2263, acc-0.7070, test loss-0.2251, acc-0.7113
Iter-8540, train loss-0.2143, acc-0.7100, valid loss-0.2259, acc-0.7086, test loss-0.2248, acc-0.7118
Iter-8550, train loss-0.2487, acc-0.6700, valid loss-0.2262, acc-0.7086, test loss-0.2251, acc-0.7112
Iter-8560, train loss-0.2514, acc-0.6800, valid loss-0.2266, acc-0.7074, test loss-0.2256, acc-0.7094
Iter-8570, train loss-0.2074, acc-0.7700, valid loss-0.2267, acc-0.7082, test loss-0.2257, acc-0.7097
Iter-8580, train loss-0.2077, acc-0.7500, valid loss-0.2266, acc-0.7074, test loss-0.2257, acc-0.7093
Iter-8590, train loss-0.2543, acc-0.6400, valid loss-0.2263, acc-0.7082, test loss-0.2254, acc-0.7107
Iter-8600, train loss-0.2122, acc-0.6900, valid loss-0.2263, acc-0.7076, test loss-0.2256, acc-0.7106
Iter-8610, train loss-0.2167, acc-0.7400, valid loss-0.2262, acc-0.7062, test loss-0.2255, acc-0.7097
Iter-8620, train loss-0.3037, acc-0.5700, valid loss-0.2260, acc-0.7058, test loss-0.2254, acc-0.7119
Iter-8630, train loss-0.2526, acc-0.6800, valid loss-0.2260, acc-0.7056, test loss-0.2253, acc-0.7120
Iter-8640, train loss-0.1974, acc-0.7700, valid loss-0.2257, acc-0.7058, test loss-0.2250, acc-0.7118
Iter-8650, train loss-0.2361, acc-0.7100, valid loss-0.2257, acc-0.7036, test loss-0.2251, acc-0.7118
Iter-8660, train loss-0.2411, acc-0.7300, valid loss-0.2254, acc-0.7066, test loss-0.2247, acc-0.7141
Iter-8670, train loss-0.2475, acc-0.6800, valid loss-0.2258, acc-0.7044, test loss-0.2250, acc-0.7110
Iter-8680, train loss-0.2450, acc-0.6600, valid loss-0.2260, acc-0.7034, test loss-0.2251, acc-0.7116
Iter-8690, train loss-0.2549, acc-0.6800, valid loss-0.2262, acc-0.7032, test loss-0.2255, acc-0.7102
Iter-8700, train loss-0.2477, acc-0.6700, valid loss-0.2262, acc-0.7024, test loss-0.2256, acc-0.7104
Iter-8710, train loss-0.2533, acc-0.6700, valid loss-0.2259, acc-0.7016, test loss-0.2252, acc-0.7105
Iter-8720, train loss-0.2298, acc-0.7100, valid loss-0.2256, acc-0.7014, test loss-0.2248, acc-0.7105
Iter-8730, train loss-0.2401, acc-0.6900, valid loss-0.2251, acc-0.7008, test loss-0.2244, acc-0.7107
Iter-8740, train loss-0.1776, acc-0.8000, valid loss-0.2251, acc-0.7004, test loss-0.2243, acc-0.7119
Iter-8750, train loss-0.2078, acc-0.7300, valid loss-0.2249, acc-0.7018, test loss-0.2242, acc-0.7122
Iter-8760, train loss-0.2345, acc-0.7000, valid loss-0.2246, acc-0.7014, test loss-0.2238, acc-0.7125
Iter-8770, train loss-0.2366, acc-0.7100, valid loss-0.2245, acc-0.7006, test loss-0.2237, acc-0.7130
Iter-8780, train loss-0.2276, acc-0.7100, valid loss-0.2246, acc-0.7016, test loss-0.2238, acc-0.7116
Iter-8790, train loss-0.2101, acc-0.8000, valid loss-0.2243, acc-0.7016, test loss-0.2236, acc-0.7115
Iter-8800, train loss-0.2832, acc-0.5700, valid loss-0.2242, acc-0.7020, test loss-0.2235, acc-0.7123
Iter-8810, train loss-0.2271, acc-0.7100, valid loss-0.2238, acc-0.7018, test loss-0.2231, acc-0.7118
Iter-8820, train loss-0.2251, acc-0.6800, valid loss-0.2237, acc-0.7022, test loss-0.2230, acc-0.7120
Iter-8830, train loss-0.2266, acc-0.6900, valid loss-0.2236, acc-0.7020, test loss-0.2228, acc-0.7127
Iter-8840, train loss-0.2264, acc-0.7000, valid loss-0.2239, acc-0.7006, test loss-0.2232, acc-0.7108
Iter-8850, train loss-0.2276, acc-0.6500, valid loss-0.2238, acc-0.6994, test loss-0.2231, acc-0.7106
Iter-8860, train loss-0.2799, acc-0.6000, valid loss-0.2238, acc-0.7012, test loss-0.2230, acc-0.7106
Iter-8870, train loss-0.2701, acc-0.6400, valid loss-0.2238, acc-0.6996, test loss-0.2232, acc-0.7097
Iter-8880, train loss-0.2336, acc-0.6800, valid loss-0.2236, acc-0.7002, test loss-0.2231, acc-0.7093
Iter-8890, train loss-0.2460, acc-0.6700, valid loss-0.2237, acc-0.6994, test loss-0.2233, acc-0.7082
Iter-8900, train loss-0.2130, acc-0.7300, valid loss-0.2238, acc-0.6996, test loss-0.2234, acc-0.7088
Iter-8910, train loss-0.2214, acc-0.7400, valid loss-0.2242, acc-0.6992, test loss-0.2237, acc-0.7080
Iter-8920, train loss-0.2686, acc-0.6300, valid loss-0.2239, acc-0.6994, test loss-0.2234, acc-0.7087
Iter-8930, train loss-0.1909, acc-0.7700, valid loss-0.2241, acc-0.7002, test loss-0.2237, acc-0.7090
Iter-8940, train loss-0.2704, acc-0.6100, valid loss-0.2240, acc-0.6998, test loss-0.2235, acc-0.7096
Iter-8950, train loss-0.2395, acc-0.7000, valid loss-0.2241, acc-0.6996, test loss-0.2235, acc-0.7095
Iter-8960, train loss-0.2511, acc-0.6500, valid loss-0.2246, acc-0.6994, test loss-0.2239, acc-0.7097
Iter-8970, train loss-0.2467, acc-0.6700, valid loss-0.2244, acc-0.7000, test loss-0.2238, acc-0.7102
Iter-8980, train loss-0.2160, acc-0.7100, valid loss-0.2243, acc-0.6996, test loss-0.2237, acc-0.7097
Iter-8990, train loss-0.2262, acc-0.6900, valid loss-0.2242, acc-0.6996, test loss-0.2236, acc-0.7097
Iter-9000, train loss-0.1943, acc-0.7300, valid loss-0.2238, acc-0.6994, test loss-0.2233, acc-0.7100
Iter-9010, train loss-0.2239, acc-0.7100, valid loss-0.2239, acc-0.6988, test loss-0.2233, acc-0.7097
Iter-9020, train loss-0.1899, acc-0.7800, valid loss-0.2244, acc-0.6982, test loss-0.2238, acc-0.7086
Iter-9030, train loss-0.2177, acc-0.7300, valid loss-0.2243, acc-0.6978, test loss-0.2237, acc-0.7079
Iter-9040, train loss-0.2213, acc-0.7100, valid loss-0.2247, acc-0.6982, test loss-0.2240, acc-0.7078
Iter-9050, train loss-0.2339, acc-0.6800, valid loss-0.2251, acc-0.6966, test loss-0.2243, acc-0.7068
Iter-9060, train loss-0.2345, acc-0.6600, valid loss-0.2252, acc-0.6954, test loss-0.2243, acc-0.7065
Iter-9070, train loss-0.2614, acc-0.6300, valid loss-0.2253, acc-0.6956, test loss-0.2245, acc-0.7066
Iter-9080, train loss-0.1871, acc-0.7400, valid loss-0.2254, acc-0.6944, test loss-0.2246, acc-0.7045
Iter-9090, train loss-0.2670, acc-0.6400, valid loss-0.2253, acc-0.6952, test loss-0.2245, acc-0.7057
Iter-9100, train loss-0.2643, acc-0.6500, valid loss-0.2250, acc-0.6950, test loss-0.2244, acc-0.7042
Iter-9110, train loss-0.2029, acc-0.7700, valid loss-0.2248, acc-0.6952, test loss-0.2241, acc-0.7031
Iter-9120, train loss-0.2109, acc-0.7200, valid loss-0.2246, acc-0.6950, test loss-0.2239, acc-0.7040
Iter-9130, train loss-0.2133, acc-0.7500, valid loss-0.2243, acc-0.6946, test loss-0.2235, acc-0.7044
Iter-9140, train loss-0.2568, acc-0.6400, valid loss-0.2241, acc-0.6958, test loss-0.2233, acc-0.7036
Iter-9150, train loss-0.2500, acc-0.6800, valid loss-0.2245, acc-0.6944, test loss-0.2236, acc-0.7034
Iter-9160, train loss-0.2356, acc-0.7200, valid loss-0.2241, acc-0.6940, test loss-0.2233, acc-0.7035
Iter-9170, train loss-0.2283, acc-0.6500, valid loss-0.2242, acc-0.6944, test loss-0.2234, acc-0.7040
Iter-9180, train loss-0.2595, acc-0.6100, valid loss-0.2241, acc-0.6958, test loss-0.2233, acc-0.7048
Iter-9190, train loss-0.2458, acc-0.6300, valid loss-0.2241, acc-0.6952, test loss-0.2232, acc-0.7045
Iter-9200, train loss-0.2161, acc-0.6900, valid loss-0.2242, acc-0.6968, test loss-0.2234, acc-0.7047
Iter-9210, train loss-0.2077, acc-0.7100, valid loss-0.2238, acc-0.6964, test loss-0.2228, acc-0.7056
Iter-9220, train loss-0.2874, acc-0.5900, valid loss-0.2237, acc-0.6946, test loss-0.2228, acc-0.7046
Iter-9230, train loss-0.2027, acc-0.7200, valid loss-0.2237, acc-0.6968, test loss-0.2227, acc-0.7057
Iter-9240, train loss-0.2452, acc-0.6300, valid loss-0.2234, acc-0.6968, test loss-0.2224, acc-0.7068
Iter-9250, train loss-0.2601, acc-0.6600, valid loss-0.2236, acc-0.6968, test loss-0.2224, acc-0.7066
Iter-9260, train loss-0.2050, acc-0.7400, valid loss-0.2238, acc-0.6966, test loss-0.2226, acc-0.7066
Iter-9270, train loss-0.2394, acc-0.6700, valid loss-0.2238, acc-0.6962, test loss-0.2224, acc-0.7075
Iter-9280, train loss-0.1657, acc-0.8100, valid loss-0.2240, acc-0.6958, test loss-0.2227, acc-0.7060
Iter-9290, train loss-0.2651, acc-0.6300, valid loss-0.2241, acc-0.6952, test loss-0.2227, acc-0.7056
Iter-9300, train loss-0.2526, acc-0.6200, valid loss-0.2239, acc-0.6964, test loss-0.2225, acc-0.7078
Iter-9310, train loss-0.2084, acc-0.7000, valid loss-0.2243, acc-0.6952, test loss-0.2228, acc-0.7072
Iter-9320, train loss-0.2377, acc-0.6600, valid loss-0.2243, acc-0.6960, test loss-0.2228, acc-0.7078
Iter-9330, train loss-0.2244, acc-0.7100, valid loss-0.2245, acc-0.6958, test loss-0.2230, acc-0.7069
Iter-9340, train loss-0.2530, acc-0.6600, valid loss-0.2245, acc-0.6958, test loss-0.2230, acc-0.7073
Iter-9350, train loss-0.2230, acc-0.6600, valid loss-0.2244, acc-0.6956, test loss-0.2229, acc-0.7081
Iter-9360, train loss-0.1999, acc-0.7300, valid loss-0.2243, acc-0.6976, test loss-0.2227, acc-0.7076
Iter-9370, train loss-0.2450, acc-0.6900, valid loss-0.2243, acc-0.6984, test loss-0.2227, acc-0.7088
Iter-9380, train loss-0.2150, acc-0.7200, valid loss-0.2246, acc-0.6982, test loss-0.2229, acc-0.7083
Iter-9390, train loss-0.2237, acc-0.7300, valid loss-0.2246, acc-0.6990, test loss-0.2228, acc-0.7086
Iter-9400, train loss-0.2330, acc-0.6600, valid loss-0.2247, acc-0.6994, test loss-0.2229, acc-0.7086
Iter-9410, train loss-0.2423, acc-0.6700, valid loss-0.2251, acc-0.7000, test loss-0.2234, acc-0.7082
Iter-9420, train loss-0.2477, acc-0.6400, valid loss-0.2250, acc-0.7006, test loss-0.2233, acc-0.7081
Iter-9430, train loss-0.2038, acc-0.7300, valid loss-0.2251, acc-0.6994, test loss-0.2235, acc-0.7078
Iter-9440, train loss-0.2288, acc-0.6900, valid loss-0.2254, acc-0.6988, test loss-0.2238, acc-0.7072
Iter-9450, train loss-0.2555, acc-0.6300, valid loss-0.2255, acc-0.6988, test loss-0.2239, acc-0.7072
Iter-9460, train loss-0.2457, acc-0.6500, valid loss-0.2257, acc-0.6972, test loss-0.2242, acc-0.7052
Iter-9470, train loss-0.1979, acc-0.7200, valid loss-0.2259, acc-0.6968, test loss-0.2243, acc-0.7053
Iter-9480, train loss-0.2071, acc-0.7300, valid loss-0.2259, acc-0.6972, test loss-0.2242, acc-0.7053
Iter-9490, train loss-0.2093, acc-0.7300, valid loss-0.2260, acc-0.6984, test loss-0.2242, acc-0.7066
Iter-9500, train loss-0.1982, acc-0.7600, valid loss-0.2267, acc-0.6970, test loss-0.2248, acc-0.7048
Iter-9510, train loss-0.2148, acc-0.7100, valid loss-0.2269, acc-0.6960, test loss-0.2251, acc-0.7044
Iter-9520, train loss-0.2222, acc-0.7100, valid loss-0.2270, acc-0.6946, test loss-0.2252, acc-0.7036
Iter-9530, train loss-0.2433, acc-0.6600, valid loss-0.2270, acc-0.6958, test loss-0.2251, acc-0.7044
Iter-9540, train loss-0.2146, acc-0.7100, valid loss-0.2271, acc-0.6948, test loss-0.2251, acc-0.7047
Iter-9550, train loss-0.2206, acc-0.7200, valid loss-0.2276, acc-0.6928, test loss-0.2256, acc-0.7028
Iter-9560, train loss-0.2597, acc-0.6500, valid loss-0.2275, acc-0.6920, test loss-0.2256, acc-0.7025
Iter-9570, train loss-0.1738, acc-0.8100, valid loss-0.2278, acc-0.6930, test loss-0.2257, acc-0.7019
Iter-9580, train loss-0.2468, acc-0.6600, valid loss-0.2276, acc-0.6922, test loss-0.2257, acc-0.7027
Iter-9590, train loss-0.2487, acc-0.6500, valid loss-0.2275, acc-0.6918, test loss-0.2255, acc-0.7026
Iter-9600, train loss-0.2336, acc-0.6700, valid loss-0.2272, acc-0.6920, test loss-0.2252, acc-0.7029
Iter-9610, train loss-0.2358, acc-0.6700, valid loss-0.2276, acc-0.6918, test loss-0.2257, acc-0.7020
Iter-9620, train loss-0.2671, acc-0.6100, valid loss-0.2276, acc-0.6916, test loss-0.2255, acc-0.7009
Iter-9630, train loss-0.2337, acc-0.6700, valid loss-0.2278, acc-0.6904, test loss-0.2259, acc-0.7000
Iter-9640, train loss-0.2287, acc-0.7200, valid loss-0.2279, acc-0.6908, test loss-0.2260, acc-0.7005
Iter-9650, train loss-0.2162, acc-0.7400, valid loss-0.2278, acc-0.6906, test loss-0.2259, acc-0.7000
Iter-9660, train loss-0.2239, acc-0.6700, valid loss-0.2277, acc-0.6904, test loss-0.2257, acc-0.7009
Iter-9670, train loss-0.2111, acc-0.6800, valid loss-0.2278, acc-0.6924, test loss-0.2256, acc-0.7023
Iter-9680, train loss-0.2167, acc-0.7000, valid loss-0.2280, acc-0.6928, test loss-0.2259, acc-0.7010
Iter-9690, train loss-0.2570, acc-0.6700, valid loss-0.2279, acc-0.6934, test loss-0.2259, acc-0.7001
Iter-9700, train loss-0.2351, acc-0.6900, valid loss-0.2282, acc-0.6926, test loss-0.2261, acc-0.7003
Iter-9710, train loss-0.2460, acc-0.6400, valid loss-0.2285, acc-0.6922, test loss-0.2264, acc-0.6993
Iter-9720, train loss-0.2203, acc-0.6900, valid loss-0.2285, acc-0.6932, test loss-0.2263, acc-0.6998
Iter-9730, train loss-0.2374, acc-0.6800, valid loss-0.2283, acc-0.6924, test loss-0.2262, acc-0.7000
Iter-9740, train loss-0.1896, acc-0.7500, valid loss-0.2288, acc-0.6926, test loss-0.2268, acc-0.6984
Iter-9750, train loss-0.2484, acc-0.6800, valid loss-0.2292, acc-0.6900, test loss-0.2271, acc-0.6975
Iter-9760, train loss-0.1960, acc-0.7600, valid loss-0.2290, acc-0.6910, test loss-0.2269, acc-0.6974
Iter-9770, train loss-0.2202, acc-0.7200, valid loss-0.2289, acc-0.6910, test loss-0.2269, acc-0.6978
Iter-9780, train loss-0.2431, acc-0.6700, valid loss-0.2288, acc-0.6914, test loss-0.2268, acc-0.6976
Iter-9790, train loss-0.1676, acc-0.7700, valid loss-0.2291, acc-0.6916, test loss-0.2272, acc-0.6958
Iter-9800, train loss-0.2641, acc-0.6200, valid loss-0.2290, acc-0.6918, test loss-0.2272, acc-0.6958
Iter-9810, train loss-0.2442, acc-0.6100, valid loss-0.2294, acc-0.6912, test loss-0.2275, acc-0.6954
Iter-9820, train loss-0.2521, acc-0.6400, valid loss-0.2292, acc-0.6916, test loss-0.2272, acc-0.6963
Iter-9830, train loss-0.2576, acc-0.6400, valid loss-0.2294, acc-0.6908, test loss-0.2273, acc-0.6966
Iter-9840, train loss-0.2223, acc-0.7200, valid loss-0.2293, acc-0.6898, test loss-0.2274, acc-0.6949
Iter-9850, train loss-0.2120, acc-0.7200, valid loss-0.2293, acc-0.6910, test loss-0.2273, acc-0.6945
Iter-9860, train loss-0.2226, acc-0.7100, valid loss-0.2297, acc-0.6892, test loss-0.2277, acc-0.6945
Iter-9870, train loss-0.2118, acc-0.6700, valid loss-0.2298, acc-0.6902, test loss-0.2277, acc-0.6940
Iter-9880, train loss-0.2563, acc-0.6200, valid loss-0.2299, acc-0.6892, test loss-0.2279, acc-0.6932
Iter-9890, train loss-0.2122, acc-0.7100, valid loss-0.2302, acc-0.6900, test loss-0.2280, acc-0.6948
Iter-9900, train loss-0.2252, acc-0.6900, valid loss-0.2301, acc-0.6888, test loss-0.2281, acc-0.6923
Iter-9910, train loss-0.2241, acc-0.6900, valid loss-0.2304, acc-0.6892, test loss-0.2284, acc-0.6926
Iter-9920, train loss-0.2077, acc-0.7100, valid loss-0.2307, acc-0.6886, test loss-0.2286, acc-0.6927
Iter-9930, train loss-0.2004, acc-0.7400, valid loss-0.2309, acc-0.6872, test loss-0.2287, acc-0.6910
Iter-9940, train loss-0.2553, acc-0.6700, valid loss-0.2307, acc-0.6864, test loss-0.2285, acc-0.6916
Iter-9950, train loss-0.2548, acc-0.6200, valid loss-0.2310, acc-0.6872, test loss-0.2287, acc-0.6924
Iter-9960, train loss-0.2028, acc-0.7300, valid loss-0.2309, acc-0.6880, test loss-0.2285, acc-0.6931
Iter-9970, train loss-0.2871, acc-0.5600, valid loss-0.2310, acc-0.6876, test loss-0.2286, acc-0.6933
Iter-9980, train loss-0.1841, acc-0.7500, valid loss-0.2313, acc-0.6870, test loss-0.2288, acc-0.6923
Iter-9990, train loss-0.2296, acc-0.7200, valid loss-0.2310, acc-0.6878, test loss-0.2285, acc-0.6924
Iter-10000, train loss-0.2775, acc-0.5500, valid loss-0.2313, acc-0.6872, test loss-0.2289, acc-0.6925
In [33]:
# # Display the learning curve and losses for training, validation, and testing
# %matplotlib inline
# %config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
plt.plot(nn.losses['train'], label='Train loss')
plt.plot(nn.losses['valid'], label='Valid loss')
plt.plot(nn.losses['test'], label='Test loss')
plt.legend()
plt.show()
In [34]:
plt.plot(nn.losses['train_acc'], label='Train accuracy')
plt.plot(nn.losses['valid_acc'], label='Valid accuracy')
plt.plot(nn.losses['test_acc'], label='Test accuracy')
plt.legend()
plt.show()
In [ ]:
In [ ]:
Content source: arasdar/DL
Similar notebooks: