In [1]:
EXPERIMENT_NAME = 'experiment_01_stanford40_train'
In [2]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random
import pickle
from utils import optimistic_restore, save
import layers
PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))
import pdb
In [3]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
PATIENCE = 2
TRIPLETS_TRAIN = '/media/red/capstone/data/stanford40_triplets_train.pkl'
TRIPLETS_VALIDATION = '/media/red/capstone/data/stanford40_triplets_val.pkl'
LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.7
BETA1 = 0.9
BETA2 = 0.99
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 10
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 3 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = os.path.join('/media/red/capstone/snapshots/', EXPERIMENT_NAME)
# Network params
NORMALIZE = True
N_FEAT = 4096
np.random.seed(seed=RANDOM_SEED)
In [4]:
'''
Load Triplets
'''
class SVM_Triplet:
def __init__(self, X1, X2, Y, base_classes, pos_class, new_class):
self.X1 = X1
self.X2 = X2
self.Y = Y
self.base_classes = base_classes
self.pos_class = pos_class
self.new_class = new_class
# Load features
triplets_loadin = {}
triplets_loadin['train'] = pickle.load(open(TRIPLETS_TRAIN, "rb"))
triplets_loadin['validation'] = pickle.load(open(TRIPLETS_VALIDATION, "rb"))
x_data = {
'train':[],
'validation':[]
}
y_data = {
'train':[],
'validation':[]
}
for partition in ['train', 'validation']:
for triplet in triplets_loadin[partition]:
if NORMALIZE:
X1 = triplet.X1 / np.linalg.norm(triplet.X1, axis=0, keepdims=True)
X2 = triplet.X2 / np.linalg.norm(triplet.X2, axis=0, keepdims=True)
Y = triplet.Y / np.linalg.norm(triplet.Y, axis=0, keepdims=True)
x_data[partition].append(np.hstack((X1, X2)))
y_data[partition].append(Y-X1)
else:
x_data[partition].append(np.hstack((triplet.X1, triplet.X2)))
y_data[partition].append(triplet.Y-triplet.X1)
x_data['train'] = np.stack(x_data['train'])
y_data['train'] = np.stack(y_data['train'])
x_data['validation'] = np.stack(x_data['validation'])
y_data['validation'] = np.stack(y_data['validation'])
In [ ]:
'''
Declare model
'''
def lrelu(x, alpha=0.1):
return tf.nn.relu(x) - alpha * tf.nn.relu(-x)
def net(x, is_training):
def dense_block(n_units):
stack.append(layers.fc(
input=stack[-1],
units=n_units,
activation='relu',
name='fc'
)[0])
stack.append(tf.contrib.layers.batch_norm(
stack[-1],
center=True, scale=True,
is_training=is_training,
scope='bn'))
stack.append(lrelu(stack[-1]))
n_units_list = [2*N_FEAT, 2*N_FEAT, N_FEAT]
stack = [x,]
for i, n in enumerate(n_units_list):
with tf.variable_scope("block_"+str(i)):
dense_block(n)
stack.append(layers.fc(
input=stack[-1],
units=4096,
activation='linear',
name='fc_final'
)[0])
return stack
In [ ]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(None, 8192))
y = tf.placeholder(dtype=tf.float32, shape=(None, 4096))
is_training = tf.placeholder(dtype=tf.bool)
global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False, initializer=0) #incremented everytime optimizer runs
lr = tf.get_variable('learning_rate', dtype=tf.float32, trainable=False, initializer=LEARNING_RATE)
net = net(x, is_training)
'''
Loss, Metrics, and Optimization Setup
'''
pred = net[-1]
pred_normalized = pred / tf.norm(pred, axis=1, keep_dims=True)
y_normalized = y / tf.norm(y,axis=1,keep_dims=True)
reduced_loss = tf.losses.cosine_distance(
labels=y_normalized,
predictions=pred_normalized,
dim=1,
reduction=tf.losses.Reduction.MEAN,
)
train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)
optimizer = tf.train.AdamOptimizer(
learning_rate=lr,
beta1=BETA1,
beta2=BETA2,
name='AdamOptimizer')
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(reduced_loss, tvars), 5.0)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)
'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
graph=tf.get_default_graph())
'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
max_to_keep=SNAPSHOT_MAX)
'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
tf.local_variables_initializer())
sess.run(init)
'''
Primary Loop
'''
partition_types = ['train', 'validation']
best_loss = float('inf')
patience_counter = 0
step_v = global_step.eval(session=sess)
for epoch in range(NUM_EPOCH):
overall_loss = 0 # Variables used for validation
print('Training Epoch {}/{}'.format(
epoch, NUM_EPOCH))
for partition in partition_types: # Itr through data partitions
n_exemplars = x_data[partition].shape[0]
if partition == 'train':
shuffle_indices = np.arange(n_exemplars)
np.random.shuffle(shuffle_indices)
x_data['train'] = x_data['train'][shuffle_indices, ...]
y_data['train'] = y_data['train'][shuffle_indices, ...]
step_v = global_step.eval(session=sess)
for i in range(0, n_exemplars, BATCH_SIZE):
upper_range = i+BATCH_SIZE
if upper_range > n_exemplars:
upper_range = n_exemplars
x_batch = x_data[partition][i:upper_range, ...]
y_batch = y_data[partition][i:upper_range, ...]
feed_dict = {
x:x_batch,
y:y_batch
}
if partition == 'train':
feed_dict[is_training] = True
else:
feed_dict[is_training] = False
# Run the proper sess run command
if partition == 'train':
start_t = time()
if step_v % SUMMARY_EVERY == 0:
_, loss_v, summary_v = sess.run(
[train_op, reduced_loss, all_train_summary],
feed_dict=feed_dict)
summary_writer.add_summary(summary_v, step_v)
duration = time() - start_t
print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format(
step_v, loss_v, duration))
else: # Vanilla Training
_ = sess.run([train_op], feed_dict=feed_dict)
step_v = global_step.eval(session=sess)
elif partition == 'validation':
feed_dict[is_training] = False
loss_v = sess.run(
[reduced_loss],
feed_dict=feed_dict)[0]
overall_loss += loss_v
# Post-epoch routine for validation set (saving, stat computation, etc)
if partition == 'validation':
duration = time() - start_t
overall_loss /= x_data['validation'].shape[0]
overall_loss_summary = tf.Summary()
overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)
summary_writer.add_summary(overall_loss_summary, step_v)
if overall_loss < best_loss:
print('New Best Loss {:.3f} < Old Best {:.3f}. Saving...'.format(
overall_loss, best_loss))
best_loss = overall_loss
patience_counter = 0
save(saver, sess, SNAPSHOT_DIR, step_v)
else:
patience_counter += 1
if patience_counter >= PATIENCE:
patience_counter = 0
lr_v = lr.eval(session=sess) * LEARNING_RATE_DECAY
lr.assign(lr_v).eval(session=sess)
Training Epoch 0/100
step 0 loss = 1.004 (3.001 sec/step)
step 10 loss = 0.999 (2.756 sec/step)
step 20 loss = 0.991 (2.561 sec/step)
step 30 loss = 0.972 (2.605 sec/step)
step 40 loss = 0.972 (2.527 sec/step)
step 50 loss = 0.968 (2.835 sec/step)
step 60 loss = 0.955 (2.768 sec/step)
step 70 loss = 0.934 (2.706 sec/step)
step 80 loss = 0.934 (2.485 sec/step)
step 90 loss = 0.935 (2.771 sec/step)
step 100 loss = 0.913 (2.771 sec/step)
step 110 loss = 0.905 (2.661 sec/step)
step 120 loss = 0.847 (2.888 sec/step)
step 130 loss = 0.895 (3.087 sec/step)
step 140 loss = 0.847 (2.615 sec/step)
step 150 loss = 0.858 (2.801 sec/step)
step 160 loss = 0.834 (2.875 sec/step)
step 170 loss = 0.846 (2.863 sec/step)
step 180 loss = 0.855 (2.617 sec/step)
step 190 loss = 0.792 (2.575 sec/step)
step 200 loss = 0.842 (2.883 sec/step)
step 210 loss = 0.839 (2.874 sec/step)
step 220 loss = 0.761 (2.938 sec/step)
step 230 loss = 0.803 (2.432 sec/step)
step 240 loss = 0.760 (3.047 sec/step)
step 250 loss = 0.805 (2.672 sec/step)
step 260 loss = 0.817 (2.580 sec/step)
step 270 loss = 0.821 (2.909 sec/step)
step 280 loss = 0.749 (2.830 sec/step)
step 290 loss = 0.762 (2.855 sec/step)
step 300 loss = 0.724 (2.659 sec/step)
step 310 loss = 0.743 (2.918 sec/step)
step 320 loss = 0.742 (3.196 sec/step)
step 330 loss = 0.784 (2.729 sec/step)
step 340 loss = 0.702 (3.020 sec/step)
step 350 loss = 0.749 (2.617 sec/step)
step 360 loss = 0.756 (2.742 sec/step)
step 370 loss = 0.724 (2.752 sec/step)
step 380 loss = 0.783 (2.499 sec/step)
step 390 loss = 0.747 (2.632 sec/step)
step 400 loss = 0.748 (2.680 sec/step)
step 410 loss = 0.770 (2.664 sec/step)
step 420 loss = 0.714 (2.714 sec/step)
step 430 loss = 0.735 (2.943 sec/step)
step 440 loss = 0.729 (2.520 sec/step)
step 450 loss = 0.779 (2.995 sec/step)
step 460 loss = 0.701 (2.605 sec/step)
step 470 loss = 0.704 (2.874 sec/step)
step 480 loss = 0.762 (2.809 sec/step)
step 490 loss = 0.699 (2.824 sec/step)
step 500 loss = 0.697 (2.582 sec/step)
step 510 loss = 0.753 (2.874 sec/step)
step 520 loss = 0.719 (2.718 sec/step)
step 530 loss = 0.658 (2.667 sec/step)
step 540 loss = 0.761 (2.424 sec/step)
step 550 loss = 0.724 (2.486 sec/step)
step 560 loss = 0.733 (2.668 sec/step)
step 570 loss = 0.711 (2.712 sec/step)
step 580 loss = 0.725 (3.067 sec/step)
step 590 loss = 0.694 (2.959 sec/step)
step 600 loss = 0.707 (2.562 sec/step)
step 610 loss = 0.727 (2.715 sec/step)
step 620 loss = 0.666 (2.375 sec/step)
step 630 loss = 0.687 (2.582 sec/step)
step 640 loss = 0.656 (2.692 sec/step)
step 650 loss = 0.663 (3.096 sec/step)
step 660 loss = 0.693 (2.794 sec/step)
step 670 loss = 0.693 (2.679 sec/step)
step 680 loss = 0.686 (2.876 sec/step)
step 690 loss = 0.663 (2.866 sec/step)
step 700 loss = 0.682 (2.521 sec/step)
step 710 loss = 0.716 (2.588 sec/step)
step 720 loss = 0.671 (2.948 sec/step)
step 730 loss = 0.737 (2.771 sec/step)
step 740 loss = 0.667 (2.790 sec/step)
step 750 loss = 0.653 (2.963 sec/step)
step 760 loss = 0.636 (2.722 sec/step)
step 770 loss = 0.701 (2.701 sec/step)
step 780 loss = 0.677 (2.707 sec/step)
step 790 loss = 0.706 (3.085 sec/step)
step 800 loss = 0.666 (2.646 sec/step)
step 810 loss = 0.646 (2.711 sec/step)
step 820 loss = 0.678 (3.137 sec/step)
step 830 loss = 0.684 (2.993 sec/step)
step 840 loss = 0.666 (2.993 sec/step)
step 850 loss = 0.678 (2.620 sec/step)
step 860 loss = 0.655 (2.640 sec/step)
step 870 loss = 0.685 (2.668 sec/step)
step 880 loss = 0.650 (2.792 sec/step)
step 890 loss = 0.650 (2.774 sec/step)
step 900 loss = 0.676 (2.774 sec/step)
step 910 loss = 0.680 (2.831 sec/step)
step 920 loss = 0.693 (2.569 sec/step)
step 930 loss = 0.654 (2.465 sec/step)
step 940 loss = 0.671 (3.147 sec/step)
step 950 loss = 0.688 (2.847 sec/step)
step 960 loss = 0.645 (2.730 sec/step)
step 970 loss = 0.679 (3.074 sec/step)
step 980 loss = 0.678 (2.932 sec/step)
step 990 loss = 0.629 (2.932 sec/step)
step 1000 loss = 0.671 (2.958 sec/step)
step 1010 loss = 0.648 (2.588 sec/step)
step 1020 loss = 0.668 (2.838 sec/step)
step 1030 loss = 0.677 (2.583 sec/step)
step 1040 loss = 0.654 (2.696 sec/step)
step 1050 loss = 0.645 (3.081 sec/step)
step 1060 loss = 0.702 (2.766 sec/step)
step 1070 loss = 0.669 (2.736 sec/step)
step 1080 loss = 0.642 (2.859 sec/step)
step 1090 loss = 0.683 (2.724 sec/step)
step 1100 loss = 0.666 (2.805 sec/step)
step 1110 loss = 0.612 (2.558 sec/step)
step 1120 loss = 0.619 (2.563 sec/step)
step 1130 loss = 0.665 (2.788 sec/step)
step 1140 loss = 0.625 (2.774 sec/step)
step 1150 loss = 0.643 (3.208 sec/step)
step 1160 loss = 0.631 (3.044 sec/step)
step 1170 loss = 0.649 (2.662 sec/step)
step 1180 loss = 0.624 (2.760 sec/step)
step 1190 loss = 0.659 (2.989 sec/step)
step 1200 loss = 0.614 (2.643 sec/step)
step 1210 loss = 0.631 (2.768 sec/step)
step 1220 loss = 0.612 (2.828 sec/step)
step 1230 loss = 0.634 (2.679 sec/step)
step 1240 loss = 0.671 (2.773 sec/step)
step 1250 loss = 0.652 (2.897 sec/step)
step 1260 loss = 0.637 (2.837 sec/step)
step 1270 loss = 0.666 (3.133 sec/step)
step 1280 loss = 0.644 (2.706 sec/step)
step 1290 loss = 0.611 (2.898 sec/step)
step 1300 loss = 0.660 (2.970 sec/step)
step 1310 loss = 0.657 (2.936 sec/step)
step 1320 loss = 0.638 (2.763 sec/step)
step 1330 loss = 0.648 (2.928 sec/step)
step 1340 loss = 0.630 (2.954 sec/step)
step 1350 loss = 0.629 (2.442 sec/step)
step 1360 loss = 0.631 (2.655 sec/step)
step 1370 loss = 0.642 (2.452 sec/step)
step 1380 loss = 0.644 (2.516 sec/step)
step 1390 loss = 0.624 (2.634 sec/step)
step 1400 loss = 0.607 (2.887 sec/step)
step 1410 loss = 0.658 (2.794 sec/step)
step 1420 loss = 0.581 (2.740 sec/step)
step 1430 loss = 0.648 (2.925 sec/step)
step 1440 loss = 0.644 (2.827 sec/step)
step 1450 loss = 0.630 (2.742 sec/step)
step 1460 loss = 0.643 (2.847 sec/step)
step 1470 loss = 0.618 (2.706 sec/step)
step 1480 loss = 0.654 (2.945 sec/step)
step 1490 loss = 0.645 (2.540 sec/step)
step 1500 loss = 0.617 (3.092 sec/step)
step 1510 loss = 0.683 (2.893 sec/step)
step 1520 loss = 0.648 (2.660 sec/step)
step 1530 loss = 0.649 (2.817 sec/step)
step 1540 loss = 0.603 (2.726 sec/step)
step 1550 loss = 0.630 (2.592 sec/step)
step 1560 loss = 0.625 (2.945 sec/step)
step 1570 loss = 0.628 (2.489 sec/step)
step 1580 loss = 0.615 (2.590 sec/step)
step 1590 loss = 0.628 (2.825 sec/step)
step 1600 loss = 0.658 (2.662 sec/step)
step 1610 loss = 0.617 (2.893 sec/step)
step 1620 loss = 0.617 (2.798 sec/step)
step 1630 loss = 0.602 (2.738 sec/step)
step 1640 loss = 0.592 (2.644 sec/step)
step 1650 loss = 0.639 (3.022 sec/step)
step 1660 loss = 0.631 (2.679 sec/step)
step 1670 loss = 0.592 (2.563 sec/step)
step 1680 loss = 0.615 (2.689 sec/step)
step 1690 loss = 0.638 (3.009 sec/step)
step 1700 loss = 0.637 (2.463 sec/step)
step 1710 loss = 0.611 (3.032 sec/step)
step 1720 loss = 0.593 (2.626 sec/step)
step 1730 loss = 0.654 (2.634 sec/step)
step 1740 loss = 0.634 (2.678 sec/step)
step 1750 loss = 0.626 (2.787 sec/step)
step 1760 loss = 0.646 (2.598 sec/step)
step 1770 loss = 0.610 (2.779 sec/step)
step 1780 loss = 0.632 (2.872 sec/step)
step 1790 loss = 0.654 (2.632 sec/step)
step 1800 loss = 0.628 (2.751 sec/step)
step 1810 loss = 0.696 (2.673 sec/step)
step 1820 loss = 0.601 (2.739 sec/step)
step 1830 loss = 0.588 (2.799 sec/step)
step 1840 loss = 0.626 (2.780 sec/step)
step 1850 loss = 0.585 (2.726 sec/step)
step 1860 loss = 0.579 (2.759 sec/step)
step 1870 loss = 0.634 (2.941 sec/step)
step 1880 loss = 0.605 (2.804 sec/step)
step 1890 loss = 0.608 (2.772 sec/step)
step 1900 loss = 0.564 (2.604 sec/step)
step 1910 loss = 0.580 (2.715 sec/step)
step 1920 loss = 0.629 (2.658 sec/step)
step 1930 loss = 0.632 (2.865 sec/step)
step 1940 loss = 0.590 (2.861 sec/step)
step 1950 loss = 0.630 (2.902 sec/step)
step 1960 loss = 0.603 (2.794 sec/step)
step 1970 loss = 0.619 (2.633 sec/step)
step 1980 loss = 0.605 (2.963 sec/step)
step 1990 loss = 0.607 (2.845 sec/step)
step 2000 loss = 0.633 (3.047 sec/step)
step 2010 loss = 0.629 (3.020 sec/step)
step 2020 loss = 0.591 (2.615 sec/step)
step 2030 loss = 0.610 (2.840 sec/step)
step 2040 loss = 0.622 (2.876 sec/step)
step 2050 loss = 0.628 (2.714 sec/step)
step 2060 loss = 0.607 (2.585 sec/step)
step 2070 loss = 0.580 (2.779 sec/step)
step 2080 loss = 0.592 (2.912 sec/step)
step 2090 loss = 0.646 (2.647 sec/step)
step 2100 loss = 0.649 (3.088 sec/step)
step 2110 loss = 0.647 (2.554 sec/step)
step 2120 loss = 0.600 (2.993 sec/step)
step 2130 loss = 0.632 (3.153 sec/step)
step 2140 loss = 0.592 (2.909 sec/step)
step 2150 loss = 0.630 (2.733 sec/step)
step 2160 loss = 0.628 (2.690 sec/step)
step 2170 loss = 0.609 (2.653 sec/step)
step 2180 loss = 0.616 (3.092 sec/step)
step 2190 loss = 0.582 (2.796 sec/step)
step 2200 loss = 0.595 (2.627 sec/step)
step 2210 loss = 0.603 (2.651 sec/step)
step 2220 loss = 0.569 (2.995 sec/step)
step 2230 loss = 0.603 (2.750 sec/step)
step 2240 loss = 0.609 (2.575 sec/step)
step 2250 loss = 0.571 (2.577 sec/step)
step 2260 loss = 0.553 (2.663 sec/step)
step 2270 loss = 0.564 (2.709 sec/step)
step 2280 loss = 0.605 (2.599 sec/step)
step 2290 loss = 0.613 (2.798 sec/step)
step 2300 loss = 0.637 (2.638 sec/step)
step 2310 loss = 0.594 (2.766 sec/step)
step 2320 loss = 0.607 (2.545 sec/step)
step 2330 loss = 0.608 (2.795 sec/step)
step 2340 loss = 0.559 (2.930 sec/step)
step 2350 loss = 0.609 (3.046 sec/step)
step 2360 loss = 0.608 (2.721 sec/step)
step 2370 loss = 0.583 (3.204 sec/step)
step 2380 loss = 0.592 (2.784 sec/step)
step 2390 loss = 0.574 (2.656 sec/step)
step 2400 loss = 0.606 (3.080 sec/step)
step 2410 loss = 0.640 (2.722 sec/step)
step 2420 loss = 0.600 (2.973 sec/step)
step 2430 loss = 0.624 (2.816 sec/step)
step 2440 loss = 0.618 (2.825 sec/step)
step 2450 loss = 0.590 (2.636 sec/step)
step 2460 loss = 0.555 (2.633 sec/step)
step 2470 loss = 0.580 (2.702 sec/step)
step 2480 loss = 0.610 (2.756 sec/step)
step 2490 loss = 0.614 (2.943 sec/step)
step 2500 loss = 0.572 (2.682 sec/step)
step 2510 loss = 0.620 (2.905 sec/step)
step 2520 loss = 0.590 (2.620 sec/step)
step 2530 loss = 0.600 (2.834 sec/step)
step 2540 loss = 0.585 (2.907 sec/step)
step 2550 loss = 0.582 (2.765 sec/step)
step 2560 loss = 0.600 (2.773 sec/step)
step 2570 loss = 0.577 (3.085 sec/step)
step 2580 loss = 0.605 (2.825 sec/step)
step 2590 loss = 0.605 (2.691 sec/step)
step 2600 loss = 0.557 (2.719 sec/step)
step 2610 loss = 0.612 (3.057 sec/step)
step 2620 loss = 0.554 (2.873 sec/step)
step 2630 loss = 0.572 (3.021 sec/step)
step 2640 loss = 0.599 (2.543 sec/step)
step 2650 loss = 0.615 (2.896 sec/step)
step 2660 loss = 0.598 (2.890 sec/step)
step 2670 loss = 0.606 (2.554 sec/step)
step 2680 loss = 0.595 (2.908 sec/step)
step 2690 loss = 0.567 (2.887 sec/step)
step 2700 loss = 0.616 (2.643 sec/step)
step 2710 loss = 0.611 (2.769 sec/step)
step 2720 loss = 0.575 (2.596 sec/step)
step 2730 loss = 0.637 (2.709 sec/step)
step 2740 loss = 0.592 (2.853 sec/step)
step 2750 loss = 0.574 (2.747 sec/step)
step 2760 loss = 0.578 (2.954 sec/step)
step 2770 loss = 0.605 (2.885 sec/step)
step 2780 loss = 0.601 (3.000 sec/step)
step 2790 loss = 0.608 (2.824 sec/step)
step 2800 loss = 0.566 (3.110 sec/step)
step 2810 loss = 0.575 (2.642 sec/step)
step 2820 loss = 0.583 (2.792 sec/step)
step 2830 loss = 0.592 (2.653 sec/step)
step 2840 loss = 0.604 (2.661 sec/step)
step 2850 loss = 0.601 (2.744 sec/step)
step 2860 loss = 0.567 (2.918 sec/step)
step 2870 loss = 0.547 (3.103 sec/step)
step 2880 loss = 0.571 (2.854 sec/step)
step 2890 loss = 0.582 (2.791 sec/step)
step 2900 loss = 0.578 (2.942 sec/step)
step 2910 loss = 0.590 (2.492 sec/step)
step 2920 loss = 0.567 (2.715 sec/step)
step 2930 loss = 0.581 (2.955 sec/step)
step 2940 loss = 0.578 (2.921 sec/step)
step 2950 loss = 0.616 (2.922 sec/step)
step 2960 loss = 0.594 (2.791 sec/step)
step 2970 loss = 0.551 (2.806 sec/step)
step 2980 loss = 0.574 (2.846 sec/step)
step 2990 loss = 0.600 (3.005 sec/step)
step 3000 loss = 0.597 (2.940 sec/step)
step 3010 loss = 0.619 (2.882 sec/step)
step 3020 loss = 0.614 (2.817 sec/step)
step 3030 loss = 0.621 (2.896 sec/step)
step 3040 loss = 0.555 (2.711 sec/step)
step 3050 loss = 0.553 (2.785 sec/step)
step 3060 loss = 0.546 (2.771 sec/step)
step 3070 loss = 0.586 (2.969 sec/step)
step 3080 loss = 0.562 (3.075 sec/step)
step 3090 loss = 0.609 (2.757 sec/step)
step 3100 loss = 0.628 (2.888 sec/step)
step 3110 loss = 0.584 (2.784 sec/step)
step 3120 loss = 0.581 (2.954 sec/step)
step 3130 loss = 0.570 (2.864 sec/step)
step 3140 loss = 0.550 (2.490 sec/step)
step 3150 loss = 0.596 (2.946 sec/step)
step 3160 loss = 0.600 (2.918 sec/step)
step 3170 loss = 0.579 (3.026 sec/step)
step 3180 loss = 0.570 (2.958 sec/step)
step 3190 loss = 0.566 (2.507 sec/step)
step 3200 loss = 0.547 (2.720 sec/step)
step 3210 loss = 0.590 (2.869 sec/step)
step 3220 loss = 0.613 (2.700 sec/step)
step 3230 loss = 0.630 (2.931 sec/step)
step 3240 loss = 0.567 (2.880 sec/step)
step 3250 loss = 0.577 (2.531 sec/step)
step 3260 loss = 0.582 (2.737 sec/step)
step 3270 loss = 0.638 (3.018 sec/step)
step 3280 loss = 0.531 (2.677 sec/step)
step 3290 loss = 0.615 (2.888 sec/step)
step 3300 loss = 0.569 (2.723 sec/step)
step 3310 loss = 0.546 (2.736 sec/step)
step 3320 loss = 0.596 (2.695 sec/step)
step 3330 loss = 0.572 (2.905 sec/step)
step 3340 loss = 0.570 (2.964 sec/step)
step 3350 loss = 0.634 (2.777 sec/step)
step 3360 loss = 0.532 (3.138 sec/step)
step 3370 loss = 0.616 (2.914 sec/step)
step 3380 loss = 0.573 (2.825 sec/step)
step 3390 loss = 0.562 (2.579 sec/step)
step 3400 loss = 0.575 (2.645 sec/step)
step 3410 loss = 0.552 (2.571 sec/step)
step 3420 loss = 0.572 (2.646 sec/step)
step 3430 loss = 0.568 (2.607 sec/step)
step 3440 loss = 0.560 (2.777 sec/step)
step 3450 loss = 0.589 (2.675 sec/step)
step 3460 loss = 0.533 (2.928 sec/step)
step 3470 loss = 0.572 (2.829 sec/step)
step 3480 loss = 0.561 (2.910 sec/step)
step 3490 loss = 0.563 (2.853 sec/step)
step 3500 loss = 0.586 (2.956 sec/step)
step 3510 loss = 0.586 (2.924 sec/step)
step 3520 loss = 0.574 (2.713 sec/step)
step 3530 loss = 0.557 (2.655 sec/step)
step 3540 loss = 0.589 (2.346 sec/step)
step 3550 loss = 0.583 (2.752 sec/step)
step 3560 loss = 0.573 (2.914 sec/step)
step 3570 loss = 0.573 (2.985 sec/step)
step 3580 loss = 0.582 (2.714 sec/step)
step 3590 loss = 0.610 (2.714 sec/step)
step 3600 loss = 0.516 (2.989 sec/step)
step 3610 loss = 0.546 (2.732 sec/step)
step 3620 loss = 0.578 (3.063 sec/step)
step 3630 loss = 0.598 (2.867 sec/step)
step 3640 loss = 0.570 (3.115 sec/step)
step 3650 loss = 0.597 (2.900 sec/step)
step 3660 loss = 0.596 (2.964 sec/step)
step 3670 loss = 0.594 (2.815 sec/step)
step 3680 loss = 0.602 (2.877 sec/step)
step 3690 loss = 0.579 (2.729 sec/step)
step 3700 loss = 0.569 (2.781 sec/step)
New Best Loss 0.079 < Old Best inf. Saving...
The checkpoint has been created.
Training Epoch 1/100
step 3710 loss = 0.547 (2.670 sec/step)
step 3720 loss = 0.580 (2.569 sec/step)
step 3730 loss = 0.513 (2.882 sec/step)
step 3740 loss = 0.534 (2.727 sec/step)
step 3750 loss = 0.519 (2.566 sec/step)
step 3760 loss = 0.530 (2.852 sec/step)
step 3770 loss = 0.571 (2.705 sec/step)
step 3780 loss = 0.557 (3.042 sec/step)
step 3790 loss = 0.573 (2.799 sec/step)
step 3800 loss = 0.530 (2.894 sec/step)
step 3810 loss = 0.521 (2.862 sec/step)
step 3820 loss = 0.526 (2.683 sec/step)
step 3830 loss = 0.513 (2.903 sec/step)
step 3840 loss = 0.522 (2.831 sec/step)
step 3850 loss = 0.535 (2.764 sec/step)
step 3860 loss = 0.536 (2.700 sec/step)
step 3870 loss = 0.549 (2.926 sec/step)
step 3880 loss = 0.560 (2.839 sec/step)
step 3890 loss = 0.570 (3.095 sec/step)
step 3900 loss = 0.534 (2.903 sec/step)
step 3910 loss = 0.549 (2.914 sec/step)
step 3920 loss = 0.525 (3.156 sec/step)
step 3930 loss = 0.558 (2.834 sec/step)
step 3940 loss = 0.528 (2.991 sec/step)
step 3950 loss = 0.555 (2.509 sec/step)
step 3960 loss = 0.546 (2.401 sec/step)
step 3970 loss = 0.535 (2.419 sec/step)
step 3980 loss = 0.550 (2.442 sec/step)
step 3990 loss = 0.534 (2.274 sec/step)
step 4000 loss = 0.570 (2.442 sec/step)
step 4010 loss = 0.534 (2.195 sec/step)
step 4020 loss = 0.491 (2.284 sec/step)
step 4030 loss = 0.534 (2.228 sec/step)
step 4040 loss = 0.491 (2.369 sec/step)
step 4050 loss = 0.535 (3.290 sec/step)
step 4060 loss = 0.565 (2.593 sec/step)
step 4070 loss = 0.553 (2.426 sec/step)
step 4080 loss = 0.536 (3.006 sec/step)
step 4090 loss = 0.568 (2.724 sec/step)
step 4100 loss = 0.563 (2.651 sec/step)
step 4110 loss = 0.534 (2.904 sec/step)
step 4120 loss = 0.523 (2.988 sec/step)
step 4130 loss = 0.535 (3.039 sec/step)
step 4140 loss = 0.541 (2.783 sec/step)
step 4150 loss = 0.521 (2.906 sec/step)
step 4160 loss = 0.552 (3.053 sec/step)
step 4170 loss = 0.534 (2.798 sec/step)
step 4180 loss = 0.527 (2.610 sec/step)
step 4190 loss = 0.566 (2.951 sec/step)
step 4200 loss = 0.524 (2.379 sec/step)
step 4210 loss = 0.528 (2.939 sec/step)
step 4220 loss = 0.498 (2.748 sec/step)
step 4230 loss = 0.516 (2.744 sec/step)
step 4240 loss = 0.516 (2.975 sec/step)
step 4250 loss = 0.517 (2.569 sec/step)
step 4260 loss = 0.563 (2.922 sec/step)
step 4270 loss = 0.562 (2.616 sec/step)
step 4280 loss = 0.532 (2.864 sec/step)
step 4290 loss = 0.530 (2.743 sec/step)
step 4300 loss = 0.560 (2.708 sec/step)
step 4310 loss = 0.550 (2.709 sec/step)
step 4320 loss = 0.506 (2.836 sec/step)
step 4330 loss = 0.558 (2.930 sec/step)
step 4340 loss = 0.548 (2.852 sec/step)
step 4350 loss = 0.557 (2.853 sec/step)
step 4360 loss = 0.553 (2.758 sec/step)
step 4370 loss = 0.574 (2.830 sec/step)
step 4380 loss = 0.578 (2.551 sec/step)
step 4390 loss = 0.553 (2.791 sec/step)
step 4400 loss = 0.505 (3.100 sec/step)
step 4410 loss = 0.531 (2.617 sec/step)
step 4420 loss = 0.540 (2.684 sec/step)
step 4430 loss = 0.566 (2.838 sec/step)
step 4440 loss = 0.549 (2.880 sec/step)
step 4450 loss = 0.503 (2.927 sec/step)
step 4460 loss = 0.503 (2.766 sec/step)
step 4470 loss = 0.560 (2.675 sec/step)
step 4480 loss = 0.555 (2.910 sec/step)
step 4490 loss = 0.530 (2.590 sec/step)
step 4500 loss = 0.491 (2.958 sec/step)
step 4510 loss = 0.534 (2.819 sec/step)
step 4520 loss = 0.522 (2.753 sec/step)
step 4530 loss = 0.521 (2.907 sec/step)
step 4540 loss = 0.544 (2.889 sec/step)
step 4550 loss = 0.520 (2.905 sec/step)
step 4560 loss = 0.553 (3.038 sec/step)
step 4570 loss = 0.530 (2.647 sec/step)
step 4580 loss = 0.539 (2.759 sec/step)
step 4590 loss = 0.553 (2.604 sec/step)
step 4600 loss = 0.538 (2.676 sec/step)
step 4610 loss = 0.547 (2.616 sec/step)
step 4620 loss = 0.516 (2.730 sec/step)
step 4630 loss = 0.525 (2.727 sec/step)
step 4640 loss = 0.498 (3.115 sec/step)
step 4650 loss = 0.529 (2.815 sec/step)
step 4660 loss = 0.503 (2.757 sec/step)
step 4670 loss = 0.567 (2.837 sec/step)
step 4680 loss = 0.536 (2.743 sec/step)
step 4690 loss = 0.546 (2.858 sec/step)
step 4700 loss = 0.517 (2.690 sec/step)
step 4710 loss = 0.516 (2.922 sec/step)
step 4720 loss = 0.546 (2.707 sec/step)
step 4730 loss = 0.541 (2.776 sec/step)
step 4740 loss = 0.521 (3.073 sec/step)
step 4750 loss = 0.527 (2.833 sec/step)
step 4760 loss = 0.554 (2.503 sec/step)
step 4770 loss = 0.531 (3.380 sec/step)
step 4780 loss = 0.526 (2.684 sec/step)
step 4790 loss = 0.541 (2.585 sec/step)
step 4800 loss = 0.509 (2.832 sec/step)
step 4810 loss = 0.518 (2.515 sec/step)
step 4820 loss = 0.499 (2.825 sec/step)
step 4830 loss = 0.540 (2.909 sec/step)
step 4840 loss = 0.523 (2.975 sec/step)
step 4850 loss = 0.533 (2.713 sec/step)
step 4860 loss = 0.563 (2.575 sec/step)
step 4870 loss = 0.539 (2.743 sec/step)
step 4880 loss = 0.555 (2.688 sec/step)
step 4890 loss = 0.488 (2.523 sec/step)
step 4900 loss = 0.547 (2.511 sec/step)
step 4910 loss = 0.557 (2.940 sec/step)
step 4920 loss = 0.564 (2.924 sec/step)
step 4930 loss = 0.554 (2.921 sec/step)
step 4940 loss = 0.545 (2.515 sec/step)
step 4950 loss = 0.524 (2.891 sec/step)
step 4960 loss = 0.554 (2.948 sec/step)
step 4970 loss = 0.536 (2.753 sec/step)
step 4980 loss = 0.535 (2.570 sec/step)
step 4990 loss = 0.520 (2.501 sec/step)
step 5000 loss = 0.515 (2.716 sec/step)
step 5010 loss = 0.508 (2.732 sec/step)
step 5020 loss = 0.552 (2.703 sec/step)
step 5030 loss = 0.556 (2.676 sec/step)
step 5040 loss = 0.549 (3.089 sec/step)
step 5050 loss = 0.530 (2.679 sec/step)
step 5060 loss = 0.537 (2.634 sec/step)
step 5070 loss = 0.558 (2.974 sec/step)
step 5080 loss = 0.515 (2.821 sec/step)
step 5090 loss = 0.556 (2.969 sec/step)
step 5100 loss = 0.501 (3.031 sec/step)
step 5110 loss = 0.518 (3.116 sec/step)
step 5120 loss = 0.553 (2.561 sec/step)
step 5130 loss = 0.549 (2.690 sec/step)
step 5140 loss = 0.529 (2.748 sec/step)
step 5150 loss = 0.546 (2.694 sec/step)
step 5160 loss = 0.553 (2.509 sec/step)
step 5170 loss = 0.556 (3.152 sec/step)
step 5180 loss = 0.511 (2.745 sec/step)
step 5190 loss = 0.543 (2.633 sec/step)
step 5200 loss = 0.546 (2.887 sec/step)
step 5210 loss = 0.519 (2.840 sec/step)
step 5220 loss = 0.541 (2.913 sec/step)
step 5230 loss = 0.559 (2.876 sec/step)
step 5240 loss = 0.523 (2.592 sec/step)
step 5250 loss = 0.535 (2.384 sec/step)
step 5260 loss = 0.516 (2.824 sec/step)
step 5270 loss = 0.520 (2.892 sec/step)
step 5280 loss = 0.511 (2.847 sec/step)
step 5290 loss = 0.528 (2.765 sec/step)
step 5300 loss = 0.503 (2.473 sec/step)
step 5310 loss = 0.520 (2.675 sec/step)
step 5320 loss = 0.511 (2.778 sec/step)
step 5330 loss = 0.547 (2.710 sec/step)
step 5340 loss = 0.573 (2.784 sec/step)
step 5350 loss = 0.566 (2.728 sec/step)
step 5360 loss = 0.504 (2.950 sec/step)
step 5370 loss = 0.517 (2.950 sec/step)
step 5380 loss = 0.506 (2.955 sec/step)
step 5390 loss = 0.536 (2.791 sec/step)
step 5400 loss = 0.533 (2.980 sec/step)
step 5410 loss = 0.540 (2.695 sec/step)
step 5420 loss = 0.541 (3.007 sec/step)
step 5430 loss = 0.541 (2.850 sec/step)
step 5440 loss = 0.518 (2.863 sec/step)
step 5450 loss = 0.528 (2.542 sec/step)
step 5460 loss = 0.522 (2.818 sec/step)
step 5470 loss = 0.524 (2.608 sec/step)
step 5480 loss = 0.519 (2.530 sec/step)
step 5490 loss = 0.530 (2.734 sec/step)
step 5500 loss = 0.541 (2.694 sec/step)
step 5510 loss = 0.544 (2.516 sec/step)
step 5520 loss = 0.495 (2.950 sec/step)
step 5530 loss = 0.532 (2.960 sec/step)
step 5540 loss = 0.494 (2.637 sec/step)
step 5550 loss = 0.517 (2.798 sec/step)
step 5560 loss = 0.528 (2.719 sec/step)
step 5570 loss = 0.524 (2.898 sec/step)
step 5580 loss = 0.488 (2.882 sec/step)
step 5590 loss = 0.527 (2.896 sec/step)
step 5600 loss = 0.537 (2.860 sec/step)
step 5610 loss = 0.517 (2.690 sec/step)
step 5620 loss = 0.493 (2.656 sec/step)
step 5630 loss = 0.518 (2.834 sec/step)
step 5640 loss = 0.518 (2.680 sec/step)
step 5650 loss = 0.546 (2.732 sec/step)
step 5660 loss = 0.485 (2.955 sec/step)
step 5670 loss = 0.531 (3.070 sec/step)
step 5680 loss = 0.541 (2.774 sec/step)
step 5690 loss = 0.557 (2.589 sec/step)
step 5700 loss = 0.540 (3.013 sec/step)
step 5710 loss = 0.579 (2.839 sec/step)
step 5720 loss = 0.537 (2.586 sec/step)
step 5730 loss = 0.533 (3.240 sec/step)
step 5740 loss = 0.587 (2.548 sec/step)
step 5750 loss = 0.536 (2.765 sec/step)
step 5760 loss = 0.536 (2.663 sec/step)
step 5770 loss = 0.511 (2.736 sec/step)
step 5780 loss = 0.504 (3.006 sec/step)
step 5790 loss = 0.527 (3.016 sec/step)
step 5800 loss = 0.516 (2.754 sec/step)
step 5810 loss = 0.544 (3.093 sec/step)
step 5820 loss = 0.537 (2.867 sec/step)
step 5830 loss = 0.547 (2.788 sec/step)
step 5840 loss = 0.521 (2.787 sec/step)
step 5850 loss = 0.519 (2.738 sec/step)
step 5860 loss = 0.527 (2.917 sec/step)
step 5870 loss = 0.550 (3.022 sec/step)
step 5880 loss = 0.546 (2.640 sec/step)
step 5890 loss = 0.539 (2.657 sec/step)
step 5900 loss = 0.514 (2.629 sec/step)
step 5910 loss = 0.515 (2.581 sec/step)
step 5920 loss = 0.526 (2.769 sec/step)
step 5930 loss = 0.554 (2.980 sec/step)
step 5940 loss = 0.557 (2.604 sec/step)
step 5950 loss = 0.543 (2.747 sec/step)
step 5960 loss = 0.491 (2.650 sec/step)
step 5970 loss = 0.498 (2.662 sec/step)
step 5980 loss = 0.579 (3.110 sec/step)
step 5990 loss = 0.535 (2.922 sec/step)
step 6000 loss = 0.530 (2.812 sec/step)
step 6010 loss = 0.571 (2.761 sec/step)
step 6020 loss = 0.544 (2.597 sec/step)
step 6030 loss = 0.499 (2.826 sec/step)
step 6040 loss = 0.502 (2.850 sec/step)
step 6050 loss = 0.542 (2.944 sec/step)
step 6060 loss = 0.535 (2.961 sec/step)
step 6070 loss = 0.518 (3.014 sec/step)
step 6080 loss = 0.530 (2.780 sec/step)
step 6090 loss = 0.537 (2.689 sec/step)
step 6100 loss = 0.474 (3.041 sec/step)
step 6110 loss = 0.525 (2.819 sec/step)
step 6120 loss = 0.513 (2.573 sec/step)
step 6130 loss = 0.530 (2.941 sec/step)
step 6140 loss = 0.537 (2.815 sec/step)
step 6150 loss = 0.508 (3.217 sec/step)
step 6160 loss = 0.534 (2.610 sec/step)
step 6170 loss = 0.503 (3.027 sec/step)
step 6180 loss = 0.511 (2.918 sec/step)
step 6190 loss = 0.562 (2.697 sec/step)
step 6200 loss = 0.523 (2.980 sec/step)
step 6210 loss = 0.498 (2.834 sec/step)
step 6220 loss = 0.533 (2.761 sec/step)
step 6230 loss = 0.545 (2.686 sec/step)
step 6240 loss = 0.529 (2.811 sec/step)
step 6250 loss = 0.552 (2.849 sec/step)
step 6260 loss = 0.566 (2.700 sec/step)
step 6270 loss = 0.539 (2.730 sec/step)
step 6280 loss = 0.536 (3.095 sec/step)
step 6290 loss = 0.529 (2.768 sec/step)
step 6300 loss = 0.552 (2.665 sec/step)
step 6310 loss = 0.518 (2.735 sec/step)
step 6320 loss = 0.567 (2.730 sec/step)
step 6330 loss = 0.526 (2.783 sec/step)
step 6340 loss = 0.535 (2.867 sec/step)
step 6350 loss = 0.557 (2.990 sec/step)
step 6360 loss = 0.521 (2.687 sec/step)
step 6370 loss = 0.522 (2.889 sec/step)
step 6380 loss = 0.541 (2.708 sec/step)
step 6390 loss = 0.529 (3.005 sec/step)
step 6400 loss = 0.522 (2.697 sec/step)
step 6410 loss = 0.507 (2.946 sec/step)
step 6420 loss = 0.536 (2.869 sec/step)
step 6430 loss = 0.478 (2.998 sec/step)
step 6440 loss = 0.554 (2.944 sec/step)
step 6450 loss = 0.498 (3.211 sec/step)
step 6460 loss = 0.511 (2.955 sec/step)
step 6470 loss = 0.504 (2.774 sec/step)
step 6480 loss = 0.509 (2.933 sec/step)
step 6490 loss = 0.501 (2.760 sec/step)
step 6500 loss = 0.514 (2.891 sec/step)
step 6510 loss = 0.527 (2.788 sec/step)
step 6520 loss = 0.541 (2.742 sec/step)
step 6530 loss = 0.545 (2.893 sec/step)
step 6540 loss = 0.569 (2.829 sec/step)
step 6550 loss = 0.497 (2.934 sec/step)
step 6560 loss = 0.546 (2.984 sec/step)
step 6570 loss = 0.489 (2.437 sec/step)
step 6580 loss = 0.549 (2.765 sec/step)
step 6590 loss = 0.525 (2.468 sec/step)
step 6600 loss = 0.512 (3.272 sec/step)
step 6610 loss = 0.507 (2.692 sec/step)
step 6620 loss = 0.533 (2.967 sec/step)
step 6630 loss = 0.539 (2.487 sec/step)
step 6640 loss = 0.526 (2.637 sec/step)
step 6650 loss = 0.521 (2.633 sec/step)
step 6660 loss = 0.548 (3.084 sec/step)
step 6670 loss = 0.496 (2.773 sec/step)
step 6680 loss = 0.525 (2.770 sec/step)
step 6690 loss = 0.513 (2.959 sec/step)
step 6700 loss = 0.537 (2.819 sec/step)
step 6710 loss = 0.531 (2.634 sec/step)
step 6720 loss = 0.508 (2.920 sec/step)
step 6730 loss = 0.537 (2.573 sec/step)
step 6740 loss = 0.541 (2.844 sec/step)
step 6750 loss = 0.514 (2.672 sec/step)
step 6760 loss = 0.540 (2.675 sec/step)
step 6770 loss = 0.506 (2.798 sec/step)
step 6780 loss = 0.548 (2.931 sec/step)
step 6790 loss = 0.553 (3.065 sec/step)
step 6800 loss = 0.496 (2.674 sec/step)
step 6810 loss = 0.528 (3.072 sec/step)
step 6820 loss = 0.502 (2.706 sec/step)
step 6830 loss = 0.570 (3.076 sec/step)
step 6840 loss = 0.522 (2.540 sec/step)
step 6850 loss = 0.524 (2.616 sec/step)
step 6860 loss = 0.534 (2.653 sec/step)
step 6870 loss = 0.578 (3.053 sec/step)
step 6880 loss = 0.524 (2.644 sec/step)
step 6890 loss = 0.528 (2.762 sec/step)
step 6900 loss = 0.555 (2.613 sec/step)
step 6910 loss = 0.573 (2.535 sec/step)
step 6920 loss = 0.537 (3.033 sec/step)
step 6930 loss = 0.530 (2.875 sec/step)
step 6940 loss = 0.510 (2.744 sec/step)
step 6950 loss = 0.497 (2.439 sec/step)
step 6960 loss = 0.490 (2.743 sec/step)
step 6970 loss = 0.522 (2.930 sec/step)
step 6980 loss = 0.511 (2.660 sec/step)
step 6990 loss = 0.526 (3.123 sec/step)
step 7000 loss = 0.515 (2.785 sec/step)
step 7010 loss = 0.529 (2.684 sec/step)
step 7020 loss = 0.504 (2.865 sec/step)
step 7030 loss = 0.524 (2.784 sec/step)
step 7040 loss = 0.510 (2.881 sec/step)
step 7050 loss = 0.556 (2.746 sec/step)
step 7060 loss = 0.523 (2.625 sec/step)
step 7070 loss = 0.489 (2.945 sec/step)
step 7080 loss = 0.476 (2.625 sec/step)
step 7090 loss = 0.497 (2.584 sec/step)
step 7100 loss = 0.495 (3.111 sec/step)
step 7110 loss = 0.563 (2.991 sec/step)
step 7120 loss = 0.506 (3.044 sec/step)
step 7130 loss = 0.537 (3.130 sec/step)
step 7140 loss = 0.553 (2.438 sec/step)
step 7150 loss = 0.507 (3.030 sec/step)
step 7160 loss = 0.504 (2.723 sec/step)
step 7170 loss = 0.526 (2.734 sec/step)
step 7180 loss = 0.518 (2.905 sec/step)
step 7190 loss = 0.489 (2.830 sec/step)
step 7200 loss = 0.537 (2.804 sec/step)
step 7210 loss = 0.533 (2.863 sec/step)
step 7220 loss = 0.518 (3.000 sec/step)
step 7230 loss = 0.536 (2.564 sec/step)
step 7240 loss = 0.535 (2.898 sec/step)
step 7250 loss = 0.526 (2.766 sec/step)
step 7260 loss = 0.518 (3.056 sec/step)
step 7270 loss = 0.526 (2.715 sec/step)
step 7280 loss = 0.523 (2.934 sec/step)
step 7290 loss = 0.564 (2.895 sec/step)
step 7300 loss = 0.529 (2.972 sec/step)
step 7310 loss = 0.543 (2.631 sec/step)
step 7320 loss = 0.543 (2.803 sec/step)
step 7330 loss = 0.533 (2.911 sec/step)
step 7340 loss = 0.541 (2.686 sec/step)
step 7350 loss = 0.566 (2.645 sec/step)
step 7360 loss = 0.541 (2.703 sec/step)
step 7370 loss = 0.517 (2.694 sec/step)
step 7380 loss = 0.514 (2.789 sec/step)
step 7390 loss = 0.509 (2.855 sec/step)
step 7400 loss = 0.530 (2.951 sec/step)
step 7410 loss = 0.520 (2.771 sec/step)
New Best Loss 0.078 < Old Best 0.079. Saving...
The checkpoint has been created.
Training Epoch 2/100
step 7420 loss = 0.458 (3.111 sec/step)
step 7430 loss = 0.486 (2.845 sec/step)
step 7440 loss = 0.474 (2.845 sec/step)
step 7450 loss = 0.487 (2.649 sec/step)
step 7460 loss = 0.478 (2.672 sec/step)
step 7470 loss = 0.503 (3.219 sec/step)
step 7480 loss = 0.476 (3.019 sec/step)
step 7490 loss = 0.477 (2.855 sec/step)
step 7500 loss = 0.451 (3.000 sec/step)
step 7510 loss = 0.490 (2.849 sec/step)
step 7520 loss = 0.481 (2.577 sec/step)
step 7530 loss = 0.486 (3.008 sec/step)
step 7540 loss = 0.455 (2.789 sec/step)
step 7550 loss = 0.485 (2.807 sec/step)
step 7560 loss = 0.491 (2.688 sec/step)
step 7570 loss = 0.444 (2.709 sec/step)
step 7580 loss = 0.438 (2.800 sec/step)
step 7590 loss = 0.473 (2.653 sec/step)
step 7600 loss = 0.443 (2.619 sec/step)
step 7610 loss = 0.473 (2.582 sec/step)
step 7620 loss = 0.508 (2.932 sec/step)
step 7630 loss = 0.465 (3.010 sec/step)
step 7640 loss = 0.477 (2.690 sec/step)
step 7650 loss = 0.464 (2.588 sec/step)
step 7660 loss = 0.500 (2.553 sec/step)
step 7670 loss = 0.495 (2.723 sec/step)
step 7680 loss = 0.470 (3.080 sec/step)
step 7690 loss = 0.464 (2.631 sec/step)
step 7700 loss = 0.449 (2.934 sec/step)
step 7710 loss = 0.489 (3.181 sec/step)
step 7720 loss = 0.468 (3.047 sec/step)
step 7730 loss = 0.480 (2.908 sec/step)
step 7740 loss = 0.490 (2.874 sec/step)
step 7750 loss = 0.491 (2.696 sec/step)
step 7760 loss = 0.495 (2.730 sec/step)
step 7770 loss = 0.460 (2.826 sec/step)
step 7780 loss = 0.457 (2.806 sec/step)
step 7790 loss = 0.468 (2.867 sec/step)
step 7800 loss = 0.483 (3.378 sec/step)
step 7810 loss = 0.505 (3.132 sec/step)
step 7820 loss = 0.477 (2.602 sec/step)
step 7830 loss = 0.516 (3.031 sec/step)
step 7840 loss = 0.444 (2.549 sec/step)
step 7850 loss = 0.463 (2.533 sec/step)
step 7860 loss = 0.463 (3.166 sec/step)
step 7870 loss = 0.445 (2.739 sec/step)
step 7880 loss = 0.475 (3.064 sec/step)
step 7890 loss = 0.490 (2.673 sec/step)
step 7900 loss = 0.451 (2.809 sec/step)
step 7910 loss = 0.455 (2.558 sec/step)
step 7920 loss = 0.443 (2.840 sec/step)
step 7930 loss = 0.436 (2.913 sec/step)
step 7940 loss = 0.472 (2.809 sec/step)
step 7950 loss = 0.496 (3.052 sec/step)
step 7960 loss = 0.493 (2.882 sec/step)
step 7970 loss = 0.477 (2.861 sec/step)
step 7980 loss = 0.496 (2.621 sec/step)
step 7990 loss = 0.443 (2.790 sec/step)
step 8000 loss = 0.452 (2.791 sec/step)
step 8010 loss = 0.478 (2.902 sec/step)
step 8020 loss = 0.431 (2.822 sec/step)
step 8030 loss = 0.480 (2.889 sec/step)
step 8040 loss = 0.475 (2.682 sec/step)
step 8050 loss = 0.491 (3.262 sec/step)
step 8060 loss = 0.464 (2.935 sec/step)
step 8070 loss = 0.491 (2.587 sec/step)
step 8080 loss = 0.475 (2.849 sec/step)
step 8090 loss = 0.464 (2.722 sec/step)
step 8100 loss = 0.482 (2.875 sec/step)
step 8110 loss = 0.490 (3.089 sec/step)
step 8120 loss = 0.480 (2.794 sec/step)
step 8130 loss = 0.459 (2.911 sec/step)
step 8140 loss = 0.478 (3.021 sec/step)
step 8150 loss = 0.509 (2.953 sec/step)
step 8160 loss = 0.493 (3.058 sec/step)
step 8170 loss = 0.462 (2.798 sec/step)
step 8180 loss = 0.461 (2.546 sec/step)
step 8190 loss = 0.459 (2.888 sec/step)
step 8200 loss = 0.473 (2.836 sec/step)
step 8210 loss = 0.481 (2.936 sec/step)
step 8220 loss = 0.433 (2.737 sec/step)
step 8230 loss = 0.460 (2.903 sec/step)
step 8240 loss = 0.469 (2.645 sec/step)
step 8250 loss = 0.442 (3.043 sec/step)
step 8260 loss = 0.479 (2.842 sec/step)
step 8270 loss = 0.489 (3.158 sec/step)
step 8280 loss = 0.486 (2.367 sec/step)
step 8290 loss = 0.452 (2.848 sec/step)
step 8300 loss = 0.470 (3.238 sec/step)
step 8310 loss = 0.479 (2.906 sec/step)
step 8320 loss = 0.475 (2.901 sec/step)
step 8330 loss = 0.470 (2.507 sec/step)
step 8340 loss = 0.476 (2.902 sec/step)
step 8350 loss = 0.477 (2.802 sec/step)
step 8360 loss = 0.494 (2.938 sec/step)
step 8370 loss = 0.504 (2.798 sec/step)
step 8380 loss = 0.449 (2.904 sec/step)
step 8390 loss = 0.439 (2.778 sec/step)
step 8400 loss = 0.516 (2.933 sec/step)
step 8410 loss = 0.475 (2.805 sec/step)
step 8420 loss = 0.487 (2.722 sec/step)
step 8430 loss = 0.482 (2.859 sec/step)
step 8440 loss = 0.468 (2.670 sec/step)
step 8450 loss = 0.502 (2.885 sec/step)
step 8460 loss = 0.491 (2.648 sec/step)
step 8470 loss = 0.474 (3.109 sec/step)
step 8480 loss = 0.469 (2.793 sec/step)
step 8490 loss = 0.499 (2.738 sec/step)
step 8500 loss = 0.439 (2.702 sec/step)
step 8510 loss = 0.455 (2.731 sec/step)
step 8520 loss = 0.481 (2.868 sec/step)
step 8530 loss = 0.460 (2.624 sec/step)
step 8540 loss = 0.458 (2.901 sec/step)
step 8550 loss = 0.481 (2.808 sec/step)
step 8560 loss = 0.495 (2.717 sec/step)
step 8570 loss = 0.477 (2.846 sec/step)
step 8580 loss = 0.476 (2.778 sec/step)
step 8590 loss = 0.476 (2.712 sec/step)
step 8600 loss = 0.471 (2.972 sec/step)
step 8610 loss = 0.452 (2.681 sec/step)
step 8620 loss = 0.487 (2.781 sec/step)
step 8630 loss = 0.474 (2.851 sec/step)
step 8640 loss = 0.498 (2.915 sec/step)
step 8650 loss = 0.459 (2.944 sec/step)
step 8660 loss = 0.490 (2.815 sec/step)
step 8670 loss = 0.474 (3.013 sec/step)
step 8680 loss = 0.483 (2.825 sec/step)
step 8690 loss = 0.441 (2.809 sec/step)
step 8700 loss = 0.494 (2.686 sec/step)
step 8710 loss = 0.505 (2.769 sec/step)
step 8720 loss = 0.454 (2.971 sec/step)
step 8730 loss = 0.468 (2.984 sec/step)
step 8740 loss = 0.482 (2.793 sec/step)
step 8750 loss = 0.483 (2.920 sec/step)
step 8760 loss = 0.478 (2.953 sec/step)
step 8770 loss = 0.479 (3.091 sec/step)
step 8780 loss = 0.487 (2.727 sec/step)
step 8790 loss = 0.482 (3.043 sec/step)
step 8800 loss = 0.464 (3.128 sec/step)
step 8810 loss = 0.463 (2.620 sec/step)
step 8820 loss = 0.467 (2.933 sec/step)
step 8830 loss = 0.481 (2.822 sec/step)
step 8840 loss = 0.497 (3.021 sec/step)
step 8850 loss = 0.467 (2.867 sec/step)
step 8860 loss = 0.496 (2.621 sec/step)
step 8870 loss = 0.507 (3.080 sec/step)
step 8880 loss = 0.489 (2.801 sec/step)
step 8890 loss = 0.508 (2.731 sec/step)
step 8900 loss = 0.468 (2.962 sec/step)
step 8910 loss = 0.462 (2.820 sec/step)
step 8920 loss = 0.470 (2.679 sec/step)
step 8930 loss = 0.508 (2.637 sec/step)
step 8940 loss = 0.433 (2.948 sec/step)
step 8950 loss = 0.454 (2.699 sec/step)
step 8960 loss = 0.471 (2.923 sec/step)
step 8970 loss = 0.500 (2.814 sec/step)
step 8980 loss = 0.459 (2.875 sec/step)
step 8990 loss = 0.462 (2.461 sec/step)
step 9000 loss = 0.482 (2.832 sec/step)
step 9010 loss = 0.497 (2.649 sec/step)
step 9020 loss = 0.466 (2.812 sec/step)
step 9030 loss = 0.480 (2.893 sec/step)
step 9040 loss = 0.510 (2.703 sec/step)
step 9050 loss = 0.495 (3.247 sec/step)
step 9060 loss = 0.482 (2.794 sec/step)
step 9070 loss = 0.489 (2.778 sec/step)
step 9080 loss = 0.480 (2.739 sec/step)
step 9090 loss = 0.484 (2.913 sec/step)
step 9100 loss = 0.474 (2.653 sec/step)
step 9110 loss = 0.478 (2.384 sec/step)
step 9120 loss = 0.489 (2.784 sec/step)
step 9130 loss = 0.485 (3.130 sec/step)
step 9140 loss = 0.488 (2.696 sec/step)
step 9150 loss = 0.481 (2.974 sec/step)
step 9160 loss = 0.469 (2.631 sec/step)
step 9170 loss = 0.501 (2.824 sec/step)
step 9180 loss = 0.487 (2.593 sec/step)
step 9190 loss = 0.458 (2.642 sec/step)
step 9200 loss = 0.472 (2.566 sec/step)
step 9210 loss = 0.480 (2.740 sec/step)
step 9220 loss = 0.495 (2.713 sec/step)
step 9230 loss = 0.490 (2.784 sec/step)
step 9240 loss = 0.480 (2.721 sec/step)
step 9250 loss = 0.492 (2.792 sec/step)
step 9260 loss = 0.480 (2.786 sec/step)
step 9270 loss = 0.485 (3.261 sec/step)
step 9280 loss = 0.452 (3.000 sec/step)
step 9290 loss = 0.496 (2.572 sec/step)
step 9300 loss = 0.453 (3.277 sec/step)
step 9310 loss = 0.472 (3.067 sec/step)
step 9320 loss = 0.508 (2.973 sec/step)
step 9330 loss = 0.489 (3.282 sec/step)
step 9340 loss = 0.479 (2.794 sec/step)
step 9350 loss = 0.495 (2.862 sec/step)
step 9360 loss = 0.480 (2.798 sec/step)
step 9370 loss = 0.477 (2.849 sec/step)
step 9380 loss = 0.495 (2.458 sec/step)
step 9390 loss = 0.498 (2.622 sec/step)
step 9400 loss = 0.472 (2.888 sec/step)
step 9410 loss = 0.469 (2.847 sec/step)
step 9420 loss = 0.471 (2.677 sec/step)
step 9430 loss = 0.524 (2.812 sec/step)
step 9440 loss = 0.478 (2.861 sec/step)
step 9450 loss = 0.467 (2.610 sec/step)
step 9460 loss = 0.495 (2.768 sec/step)
step 9470 loss = 0.466 (2.633 sec/step)
step 9480 loss = 0.491 (2.646 sec/step)
step 9490 loss = 0.470 (2.892 sec/step)
step 9500 loss = 0.491 (2.863 sec/step)
step 9510 loss = 0.501 (2.968 sec/step)
step 9520 loss = 0.493 (2.766 sec/step)
step 9530 loss = 0.479 (2.652 sec/step)
step 9540 loss = 0.511 (2.829 sec/step)
step 9550 loss = 0.478 (2.752 sec/step)
step 9560 loss = 0.478 (2.718 sec/step)
step 9570 loss = 0.480 (2.868 sec/step)
step 9580 loss = 0.477 (2.813 sec/step)
step 9590 loss = 0.494 (2.885 sec/step)
step 9600 loss = 0.485 (2.863 sec/step)
step 9610 loss = 0.486 (2.871 sec/step)
step 9620 loss = 0.475 (2.753 sec/step)
step 9630 loss = 0.506 (3.155 sec/step)
step 9640 loss = 0.487 (2.614 sec/step)
step 9650 loss = 0.508 (2.272 sec/step)
step 9660 loss = 0.458 (2.308 sec/step)
step 9670 loss = 0.454 (2.364 sec/step)
step 9680 loss = 0.464 (2.294 sec/step)
step 9690 loss = 0.478 (2.395 sec/step)
step 9700 loss = 0.474 (2.406 sec/step)
step 9710 loss = 0.463 (2.254 sec/step)
step 9720 loss = 0.495 (2.337 sec/step)
step 9730 loss = 0.449 (2.360 sec/step)
step 9740 loss = 0.458 (2.266 sec/step)
step 9750 loss = 0.507 (2.414 sec/step)
step 9760 loss = 0.473 (2.161 sec/step)
step 9770 loss = 0.487 (2.266 sec/step)
step 9780 loss = 0.491 (2.279 sec/step)
step 9790 loss = 0.489 (2.291 sec/step)
step 9800 loss = 0.476 (3.174 sec/step)
step 9810 loss = 0.475 (2.655 sec/step)
step 9820 loss = 0.477 (2.467 sec/step)
step 9830 loss = 0.483 (2.697 sec/step)
step 9840 loss = 0.482 (2.845 sec/step)
step 9850 loss = 0.468 (2.524 sec/step)
step 9860 loss = 0.477 (3.226 sec/step)
step 9870 loss = 0.478 (2.980 sec/step)
step 9880 loss = 0.463 (2.865 sec/step)
step 9890 loss = 0.490 (2.874 sec/step)
step 9900 loss = 0.475 (2.866 sec/step)
step 9910 loss = 0.474 (2.603 sec/step)
step 9920 loss = 0.471 (2.535 sec/step)
step 9930 loss = 0.511 (2.784 sec/step)
step 9940 loss = 0.485 (2.750 sec/step)
step 9950 loss = 0.496 (2.687 sec/step)
step 9960 loss = 0.482 (2.884 sec/step)
step 9970 loss = 0.463 (3.032 sec/step)
step 9980 loss = 0.500 (2.961 sec/step)
step 9990 loss = 0.487 (2.829 sec/step)
step 10000 loss = 0.469 (3.043 sec/step)
step 10010 loss = 0.432 (2.571 sec/step)
step 10020 loss = 0.487 (2.660 sec/step)
step 10030 loss = 0.460 (2.657 sec/step)
step 10040 loss = 0.473 (2.485 sec/step)
step 10050 loss = 0.505 (2.723 sec/step)
step 10060 loss = 0.478 (2.758 sec/step)
step 10070 loss = 0.492 (2.854 sec/step)
step 10080 loss = 0.482 (2.638 sec/step)
step 10090 loss = 0.478 (2.505 sec/step)
step 10100 loss = 0.510 (2.770 sec/step)
step 10110 loss = 0.480 (2.851 sec/step)
step 10120 loss = 0.492 (3.126 sec/step)
step 10130 loss = 0.469 (3.149 sec/step)
step 10140 loss = 0.484 (2.754 sec/step)
step 10150 loss = 0.482 (2.879 sec/step)
step 10160 loss = 0.474 (2.995 sec/step)
step 10170 loss = 0.495 (3.261 sec/step)
step 10180 loss = 0.498 (2.577 sec/step)
step 10190 loss = 0.455 (2.736 sec/step)
step 10200 loss = 0.472 (3.024 sec/step)
step 10210 loss = 0.453 (2.618 sec/step)
step 10220 loss = 0.513 (3.007 sec/step)
step 10230 loss = 0.471 (2.848 sec/step)
step 10240 loss = 0.488 (2.744 sec/step)
step 10250 loss = 0.483 (2.591 sec/step)
step 10260 loss = 0.479 (3.181 sec/step)
step 10270 loss = 0.471 (2.748 sec/step)
step 10280 loss = 0.467 (3.225 sec/step)
step 10290 loss = 0.479 (2.943 sec/step)
step 10300 loss = 0.492 (2.876 sec/step)
step 10310 loss = 0.490 (2.699 sec/step)
step 10320 loss = 0.459 (3.119 sec/step)
step 10330 loss = 0.443 (3.022 sec/step)
step 10340 loss = 0.490 (2.778 sec/step)
step 10350 loss = 0.448 (2.690 sec/step)
step 10360 loss = 0.465 (2.781 sec/step)
step 10370 loss = 0.440 (3.042 sec/step)
step 10380 loss = 0.469 (2.765 sec/step)
step 10390 loss = 0.471 (3.160 sec/step)
step 10400 loss = 0.479 (2.848 sec/step)
step 10410 loss = 0.518 (3.052 sec/step)
step 10420 loss = 0.462 (2.627 sec/step)
step 10430 loss = 0.487 (2.558 sec/step)
step 10440 loss = 0.480 (2.775 sec/step)
step 10450 loss = 0.494 (2.632 sec/step)
step 10460 loss = 0.477 (2.722 sec/step)
step 10470 loss = 0.462 (2.870 sec/step)
step 10480 loss = 0.462 (2.733 sec/step)
step 10490 loss = 0.476 (2.953 sec/step)
step 10500 loss = 0.498 (2.738 sec/step)
step 10510 loss = 0.487 (2.785 sec/step)
step 10520 loss = 0.474 (2.959 sec/step)
step 10530 loss = 0.470 (2.781 sec/step)
step 10540 loss = 0.458 (2.930 sec/step)
step 10550 loss = 0.480 (3.263 sec/step)
step 10560 loss = 0.482 (2.584 sec/step)
step 10570 loss = 0.479 (2.728 sec/step)
step 10580 loss = 0.479 (2.812 sec/step)
step 10590 loss = 0.473 (2.859 sec/step)
step 10600 loss = 0.496 (2.509 sec/step)
step 10610 loss = 0.456 (2.920 sec/step)
step 10620 loss = 0.481 (2.707 sec/step)
step 10630 loss = 0.461 (2.759 sec/step)
step 10640 loss = 0.511 (3.438 sec/step)
step 10650 loss = 0.478 (2.605 sec/step)
step 10660 loss = 0.479 (3.172 sec/step)
step 10670 loss = 0.496 (2.731 sec/step)
step 10680 loss = 0.485 (2.929 sec/step)
step 10690 loss = 0.478 (2.851 sec/step)
step 10700 loss = 0.484 (2.846 sec/step)
step 10710 loss = 0.476 (2.903 sec/step)
step 10720 loss = 0.520 (3.207 sec/step)
step 10730 loss = 0.502 (2.950 sec/step)
step 10740 loss = 0.493 (2.977 sec/step)
step 10750 loss = 0.443 (3.161 sec/step)
step 10760 loss = 0.444 (2.731 sec/step)
step 10770 loss = 0.482 (2.630 sec/step)
step 10780 loss = 0.445 (2.657 sec/step)
step 10790 loss = 0.481 (2.870 sec/step)
step 10800 loss = 0.471 (3.087 sec/step)
step 10810 loss = 0.473 (2.788 sec/step)
step 10820 loss = 0.451 (3.024 sec/step)
step 10830 loss = 0.461 (3.010 sec/step)
step 10840 loss = 0.436 (2.766 sec/step)
step 10850 loss = 0.509 (2.864 sec/step)
step 10860 loss = 0.475 (3.195 sec/step)
step 10870 loss = 0.504 (3.010 sec/step)
step 10880 loss = 0.466 (2.815 sec/step)
step 10890 loss = 0.474 (2.667 sec/step)
step 10900 loss = 0.468 (2.941 sec/step)
step 10910 loss = 0.498 (3.116 sec/step)
step 10920 loss = 0.438 (2.983 sec/step)
step 10930 loss = 0.477 (2.563 sec/step)
step 10940 loss = 0.466 (3.040 sec/step)
step 10950 loss = 0.462 (2.734 sec/step)
step 10960 loss = 0.528 (2.781 sec/step)
step 10970 loss = 0.489 (2.416 sec/step)
step 10980 loss = 0.495 (2.663 sec/step)
step 10990 loss = 0.476 (2.813 sec/step)
step 11000 loss = 0.503 (2.734 sec/step)
step 11010 loss = 0.507 (2.879 sec/step)
step 11020 loss = 0.483 (2.521 sec/step)
step 11030 loss = 0.462 (2.822 sec/step)
step 11040 loss = 0.498 (3.057 sec/step)
step 11050 loss = 0.471 (3.088 sec/step)
step 11060 loss = 0.489 (2.713 sec/step)
step 11070 loss = 0.498 (3.029 sec/step)
step 11080 loss = 0.506 (3.072 sec/step)
step 11090 loss = 0.488 (2.714 sec/step)
step 11100 loss = 0.488 (2.923 sec/step)
step 11110 loss = 0.503 (2.743 sec/step)
step 11120 loss = 0.489 (2.738 sec/step)
New Best Loss 0.076 < Old Best 0.078. Saving...
The checkpoint has been created.
Training Epoch 3/100
step 11130 loss = 0.412 (2.729 sec/step)
step 11140 loss = 0.406 (2.611 sec/step)
step 11150 loss = 0.441 (2.823 sec/step)
step 11160 loss = 0.405 (2.764 sec/step)
step 11170 loss = 0.445 (2.971 sec/step)
step 11180 loss = 0.437 (2.905 sec/step)
step 11190 loss = 0.440 (2.479 sec/step)
step 11200 loss = 0.414 (3.013 sec/step)
step 11210 loss = 0.408 (2.567 sec/step)
step 11220 loss = 0.438 (2.862 sec/step)
step 11230 loss = 0.450 (2.965 sec/step)
step 11240 loss = 0.462 (2.727 sec/step)
step 11250 loss = 0.419 (3.032 sec/step)
step 11260 loss = 0.454 (2.595 sec/step)
step 11270 loss = 0.417 (2.940 sec/step)
step 11280 loss = 0.428 (2.837 sec/step)
step 11290 loss = 0.401 (3.012 sec/step)
step 11300 loss = 0.403 (2.735 sec/step)
step 11310 loss = 0.407 (2.747 sec/step)
step 11320 loss = 0.434 (2.945 sec/step)
step 11330 loss = 0.453 (2.798 sec/step)
step 11340 loss = 0.443 (2.814 sec/step)
step 11350 loss = 0.433 (2.789 sec/step)
step 11360 loss = 0.419 (2.678 sec/step)
step 11370 loss = 0.410 (2.802 sec/step)
step 11380 loss = 0.415 (2.755 sec/step)
step 11390 loss = 0.435 (2.777 sec/step)
step 11400 loss = 0.440 (2.636 sec/step)
step 11410 loss = 0.437 (2.884 sec/step)
step 11420 loss = 0.404 (2.879 sec/step)
step 11430 loss = 0.427 (2.620 sec/step)
step 11440 loss = 0.442 (2.830 sec/step)
step 11450 loss = 0.426 (2.931 sec/step)
step 11460 loss = 0.438 (2.873 sec/step)
step 11470 loss = 0.459 (2.593 sec/step)
step 11480 loss = 0.421 (2.886 sec/step)
step 11490 loss = 0.424 (2.824 sec/step)
step 11500 loss = 0.428 (2.663 sec/step)
step 11510 loss = 0.418 (2.946 sec/step)
step 11520 loss = 0.411 (2.754 sec/step)
step 11530 loss = 0.406 (2.639 sec/step)
step 11540 loss = 0.425 (2.862 sec/step)
step 11550 loss = 0.426 (2.804 sec/step)
step 11560 loss = 0.427 (2.969 sec/step)
step 11570 loss = 0.403 (2.838 sec/step)
step 11580 loss = 0.429 (2.729 sec/step)
step 11590 loss = 0.406 (2.942 sec/step)
step 11600 loss = 0.441 (3.017 sec/step)
step 11610 loss = 0.401 (2.781 sec/step)
step 11620 loss = 0.442 (2.905 sec/step)
step 11630 loss = 0.414 (3.036 sec/step)
step 11640 loss = 0.459 (2.795 sec/step)
step 11650 loss = 0.429 (2.769 sec/step)
step 11660 loss = 0.419 (2.855 sec/step)
step 11670 loss = 0.417 (3.041 sec/step)
step 11680 loss = 0.431 (2.659 sec/step)
step 11690 loss = 0.448 (2.886 sec/step)
step 11700 loss = 0.469 (2.876 sec/step)
step 11710 loss = 0.411 (2.882 sec/step)
step 11720 loss = 0.403 (2.705 sec/step)
step 11730 loss = 0.454 (2.861 sec/step)
step 11740 loss = 0.427 (3.070 sec/step)
step 11750 loss = 0.450 (2.768 sec/step)
step 11760 loss = 0.411 (2.692 sec/step)
step 11770 loss = 0.442 (2.895 sec/step)
step 11780 loss = 0.421 (2.650 sec/step)
step 11790 loss = 0.419 (2.759 sec/step)
step 11800 loss = 0.442 (2.532 sec/step)
step 11810 loss = 0.433 (3.072 sec/step)
step 11820 loss = 0.463 (2.990 sec/step)
step 11830 loss = 0.419 (2.633 sec/step)
step 11840 loss = 0.440 (2.759 sec/step)
step 11850 loss = 0.437 (2.700 sec/step)
step 11860 loss = 0.427 (2.920 sec/step)
step 11870 loss = 0.423 (2.502 sec/step)
step 11880 loss = 0.454 (2.832 sec/step)
step 11890 loss = 0.459 (2.795 sec/step)
step 11900 loss = 0.439 (2.734 sec/step)
step 11910 loss = 0.420 (3.039 sec/step)
step 11920 loss = 0.420 (2.661 sec/step)
step 11930 loss = 0.459 (3.142 sec/step)
step 11940 loss = 0.443 (2.929 sec/step)
step 11950 loss = 0.434 (2.951 sec/step)
step 11960 loss = 0.451 (3.037 sec/step)
step 11970 loss = 0.419 (2.684 sec/step)
step 11980 loss = 0.453 (2.902 sec/step)
step 11990 loss = 0.419 (3.374 sec/step)
step 12000 loss = 0.442 (2.591 sec/step)
step 12010 loss = 0.420 (2.820 sec/step)
step 12020 loss = 0.453 (2.755 sec/step)
step 12030 loss = 0.421 (2.440 sec/step)
step 12040 loss = 0.444 (3.215 sec/step)
step 12050 loss = 0.424 (2.714 sec/step)
step 12060 loss = 0.432 (3.497 sec/step)
step 12070 loss = 0.431 (2.631 sec/step)
step 12080 loss = 0.419 (2.729 sec/step)
step 12090 loss = 0.434 (2.670 sec/step)
step 12100 loss = 0.441 (3.093 sec/step)
step 12110 loss = 0.452 (2.862 sec/step)
step 12120 loss = 0.449 (2.848 sec/step)
step 12130 loss = 0.436 (2.916 sec/step)
step 12140 loss = 0.427 (2.997 sec/step)
step 12150 loss = 0.428 (2.709 sec/step)
step 12160 loss = 0.474 (2.834 sec/step)
step 12170 loss = 0.412 (2.685 sec/step)
step 12180 loss = 0.441 (2.848 sec/step)
step 12190 loss = 0.442 (2.860 sec/step)
step 12200 loss = 0.433 (2.416 sec/step)
step 12210 loss = 0.450 (2.772 sec/step)
step 12220 loss = 0.420 (2.903 sec/step)
step 12230 loss = 0.410 (2.897 sec/step)
step 12240 loss = 0.423 (3.027 sec/step)
step 12250 loss = 0.445 (2.862 sec/step)
step 12260 loss = 0.426 (2.756 sec/step)
step 12270 loss = 0.408 (2.961 sec/step)
step 12280 loss = 0.412 (3.137 sec/step)
step 12290 loss = 0.427 (2.909 sec/step)
step 12300 loss = 0.483 (3.047 sec/step)
step 12310 loss = 0.439 (2.714 sec/step)
step 12320 loss = 0.416 (2.969 sec/step)
step 12330 loss = 0.424 (3.098 sec/step)
step 12340 loss = 0.435 (3.113 sec/step)
step 12350 loss = 0.443 (2.866 sec/step)
step 12360 loss = 0.418 (2.986 sec/step)
step 12370 loss = 0.434 (3.356 sec/step)
step 12380 loss = 0.419 (3.130 sec/step)
step 12390 loss = 0.420 (2.843 sec/step)
step 12400 loss = 0.421 (2.863 sec/step)
step 12410 loss = 0.430 (2.567 sec/step)
step 12420 loss = 0.459 (2.608 sec/step)
step 12430 loss = 0.447 (3.085 sec/step)
step 12440 loss = 0.483 (2.652 sec/step)
step 12450 loss = 0.439 (2.967 sec/step)
step 12460 loss = 0.442 (2.923 sec/step)
step 12470 loss = 0.425 (3.114 sec/step)
step 12480 loss = 0.431 (2.725 sec/step)
step 12490 loss = 0.451 (2.679 sec/step)
step 12500 loss = 0.432 (2.511 sec/step)
step 12510 loss = 0.436 (2.763 sec/step)
step 12520 loss = 0.440 (2.860 sec/step)
step 12530 loss = 0.429 (2.969 sec/step)
step 12540 loss = 0.456 (2.721 sec/step)
step 12550 loss = 0.463 (2.766 sec/step)
step 12560 loss = 0.434 (2.474 sec/step)
step 12570 loss = 0.424 (2.907 sec/step)
step 12580 loss = 0.448 (2.748 sec/step)
step 12590 loss = 0.441 (2.847 sec/step)
step 12600 loss = 0.421 (2.965 sec/step)
step 12610 loss = 0.428 (2.639 sec/step)
step 12620 loss = 0.434 (2.722 sec/step)
step 12630 loss = 0.444 (3.072 sec/step)
step 12640 loss = 0.427 (2.709 sec/step)
step 12650 loss = 0.446 (3.059 sec/step)
step 12660 loss = 0.464 (2.835 sec/step)
step 12670 loss = 0.440 (2.974 sec/step)
step 12680 loss = 0.408 (3.097 sec/step)
step 12690 loss = 0.424 (2.808 sec/step)
step 12700 loss = 0.439 (2.617 sec/step)
step 12710 loss = 0.449 (2.995 sec/step)
step 12720 loss = 0.431 (3.001 sec/step)
step 12730 loss = 0.444 (2.612 sec/step)
step 12740 loss = 0.455 (2.719 sec/step)
step 12750 loss = 0.435 (2.796 sec/step)
step 12760 loss = 0.441 (3.128 sec/step)
step 12770 loss = 0.427 (3.123 sec/step)
step 12780 loss = 0.452 (2.637 sec/step)
step 12790 loss = 0.448 (2.945 sec/step)
step 12800 loss = 0.443 (2.805 sec/step)
step 12810 loss = 0.459 (2.884 sec/step)
step 12820 loss = 0.442 (2.812 sec/step)
step 12830 loss = 0.419 (2.913 sec/step)
step 12840 loss = 0.412 (2.914 sec/step)
step 12850 loss = 0.417 (2.558 sec/step)
step 12860 loss = 0.452 (2.699 sec/step)
step 12870 loss = 0.449 (2.808 sec/step)
step 12880 loss = 0.441 (2.780 sec/step)
step 12890 loss = 0.424 (2.758 sec/step)
step 12900 loss = 0.428 (2.859 sec/step)
step 12910 loss = 0.419 (2.837 sec/step)
step 12920 loss = 0.482 (2.868 sec/step)
step 12930 loss = 0.422 (2.812 sec/step)
step 12940 loss = 0.454 (3.299 sec/step)
step 12950 loss = 0.438 (2.761 sec/step)
step 12960 loss = 0.437 (2.950 sec/step)
step 12970 loss = 0.416 (3.160 sec/step)
step 12980 loss = 0.450 (2.876 sec/step)
step 12990 loss = 0.419 (2.941 sec/step)
step 13000 loss = 0.425 (2.783 sec/step)
step 13010 loss = 0.420 (2.773 sec/step)
step 13020 loss = 0.455 (3.230 sec/step)
step 13030 loss = 0.395 (2.818 sec/step)
step 13040 loss = 0.455 (3.035 sec/step)
step 13050 loss = 0.430 (3.192 sec/step)
step 13060 loss = 0.443 (2.773 sec/step)
step 13070 loss = 0.435 (2.792 sec/step)
step 13080 loss = 0.455 (2.684 sec/step)
step 13090 loss = 0.398 (2.742 sec/step)
step 13100 loss = 0.424 (2.692 sec/step)
step 13110 loss = 0.426 (2.693 sec/step)
step 13120 loss = 0.449 (3.206 sec/step)
step 13130 loss = 0.456 (2.537 sec/step)
step 13140 loss = 0.445 (3.098 sec/step)
step 13150 loss = 0.435 (2.726 sec/step)
step 13160 loss = 0.421 (2.994 sec/step)
step 13170 loss = 0.431 (3.029 sec/step)
step 13180 loss = 0.443 (2.764 sec/step)
step 13190 loss = 0.456 (2.829 sec/step)
step 13200 loss = 0.440 (2.600 sec/step)
step 13210 loss = 0.438 (3.260 sec/step)
step 13220 loss = 0.460 (2.869 sec/step)
step 13230 loss = 0.458 (2.835 sec/step)
step 13240 loss = 0.456 (2.843 sec/step)
step 13250 loss = 0.433 (2.774 sec/step)
step 13260 loss = 0.427 (2.713 sec/step)
step 13270 loss = 0.421 (3.036 sec/step)
step 13280 loss = 0.447 (3.224 sec/step)
step 13290 loss = 0.435 (2.710 sec/step)
step 13300 loss = 0.428 (2.707 sec/step)
step 13310 loss = 0.445 (2.469 sec/step)
step 13320 loss = 0.461 (3.129 sec/step)
step 13330 loss = 0.430 (2.699 sec/step)
step 13340 loss = 0.466 (2.708 sec/step)
step 13350 loss = 0.459 (3.034 sec/step)
step 13360 loss = 0.423 (2.802 sec/step)
step 13370 loss = 0.428 (2.683 sec/step)
step 13380 loss = 0.425 (3.095 sec/step)
step 13390 loss = 0.421 (2.829 sec/step)
step 13400 loss = 0.448 (2.879 sec/step)
step 13410 loss = 0.463 (2.713 sec/step)
step 13420 loss = 0.441 (2.841 sec/step)
step 13430 loss = 0.443 (2.853 sec/step)
step 13440 loss = 0.423 (3.218 sec/step)
step 13450 loss = 0.458 (2.782 sec/step)
step 13460 loss = 0.433 (2.952 sec/step)
step 13470 loss = 0.467 (2.854 sec/step)
step 13480 loss = 0.420 (2.869 sec/step)
step 13490 loss = 0.406 (2.763 sec/step)
step 13500 loss = 0.430 (2.877 sec/step)
step 13510 loss = 0.446 (2.880 sec/step)
step 13520 loss = 0.454 (2.776 sec/step)
step 13530 loss = 0.451 (2.606 sec/step)
step 13540 loss = 0.428 (2.925 sec/step)
step 13550 loss = 0.433 (2.576 sec/step)
step 13560 loss = 0.451 (2.625 sec/step)
step 13570 loss = 0.458 (2.714 sec/step)
step 13580 loss = 0.420 (3.233 sec/step)
step 13590 loss = 0.428 (2.760 sec/step)
step 13600 loss = 0.424 (2.949 sec/step)
step 13610 loss = 0.447 (2.712 sec/step)
step 13620 loss = 0.410 (2.801 sec/step)
step 13630 loss = 0.429 (2.759 sec/step)
step 13640 loss = 0.440 (2.607 sec/step)
step 13650 loss = 0.419 (2.830 sec/step)
step 13660 loss = 0.433 (2.740 sec/step)
step 13670 loss = 0.428 (2.994 sec/step)
step 13680 loss = 0.432 (2.887 sec/step)
step 13690 loss = 0.423 (2.835 sec/step)
step 13700 loss = 0.427 (2.995 sec/step)
step 13710 loss = 0.463 (3.132 sec/step)
step 13720 loss = 0.440 (2.679 sec/step)
step 13730 loss = 0.394 (3.014 sec/step)
step 13740 loss = 0.425 (2.775 sec/step)
step 13750 loss = 0.437 (2.852 sec/step)
step 13760 loss = 0.466 (2.712 sec/step)
step 13770 loss = 0.414 (2.626 sec/step)
step 13780 loss = 0.411 (2.570 sec/step)
step 13790 loss = 0.424 (2.849 sec/step)
step 13800 loss = 0.449 (2.820 sec/step)
step 13810 loss = 0.437 (2.729 sec/step)
step 13820 loss = 0.435 (2.640 sec/step)
step 13830 loss = 0.452 (2.854 sec/step)
step 13840 loss = 0.453 (2.502 sec/step)
step 13850 loss = 0.427 (2.698 sec/step)
step 13860 loss = 0.456 (2.864 sec/step)
step 13870 loss = 0.435 (2.866 sec/step)
step 13880 loss = 0.440 (2.756 sec/step)
step 13890 loss = 0.455 (2.824 sec/step)
step 13900 loss = 0.416 (2.703 sec/step)
step 13910 loss = 0.433 (2.734 sec/step)
step 13920 loss = 0.426 (2.998 sec/step)
step 13930 loss = 0.408 (2.934 sec/step)
step 13940 loss = 0.461 (3.155 sec/step)
step 13950 loss = 0.453 (2.853 sec/step)
step 13960 loss = 0.415 (2.696 sec/step)
step 13970 loss = 0.408 (2.694 sec/step)
step 13980 loss = 0.418 (3.158 sec/step)
step 13990 loss = 0.433 (2.896 sec/step)
step 14000 loss = 0.448 (2.985 sec/step)
step 14010 loss = 0.447 (2.798 sec/step)
step 14020 loss = 0.455 (3.174 sec/step)
step 14030 loss = 0.430 (2.976 sec/step)
step 14040 loss = 0.460 (3.229 sec/step)
step 14050 loss = 0.440 (2.950 sec/step)
step 14060 loss = 0.451 (2.655 sec/step)
step 14070 loss = 0.467 (2.841 sec/step)
step 14080 loss = 0.463 (2.770 sec/step)
step 14090 loss = 0.433 (3.063 sec/step)
step 14100 loss = 0.411 (2.706 sec/step)
step 14110 loss = 0.398 (2.883 sec/step)
step 14120 loss = 0.440 (2.890 sec/step)
step 14130 loss = 0.458 (2.849 sec/step)
step 14140 loss = 0.427 (2.790 sec/step)
step 14150 loss = 0.435 (2.944 sec/step)
step 14160 loss = 0.474 (2.821 sec/step)
step 14170 loss = 0.434 (2.625 sec/step)
step 14180 loss = 0.448 (2.982 sec/step)
step 14190 loss = 0.446 (2.826 sec/step)
step 14200 loss = 0.437 (2.723 sec/step)
step 14210 loss = 0.444 (2.941 sec/step)
step 14220 loss = 0.419 (2.781 sec/step)
step 14230 loss = 0.444 (2.571 sec/step)
step 14240 loss = 0.444 (2.781 sec/step)
step 14250 loss = 0.447 (2.725 sec/step)
step 14260 loss = 0.442 (2.936 sec/step)
step 14270 loss = 0.435 (2.829 sec/step)
step 14280 loss = 0.427 (2.907 sec/step)
step 14290 loss = 0.450 (3.165 sec/step)
step 14300 loss = 0.455 (2.912 sec/step)
step 14310 loss = 0.418 (2.777 sec/step)
step 14320 loss = 0.460 (2.772 sec/step)
step 14330 loss = 0.452 (2.521 sec/step)
step 14340 loss = 0.426 (2.603 sec/step)
step 14350 loss = 0.426 (2.731 sec/step)
step 14360 loss = 0.457 (2.621 sec/step)
step 14370 loss = 0.432 (2.878 sec/step)
step 14380 loss = 0.447 (2.948 sec/step)
step 14390 loss = 0.431 (2.783 sec/step)
step 14400 loss = 0.446 (2.901 sec/step)
step 14410 loss = 0.431 (2.720 sec/step)
step 14420 loss = 0.442 (2.734 sec/step)
step 14430 loss = 0.439 (2.730 sec/step)
step 14440 loss = 0.435 (2.769 sec/step)
step 14450 loss = 0.425 (2.792 sec/step)
step 14460 loss = 0.414 (2.879 sec/step)
step 14470 loss = 0.464 (2.808 sec/step)
step 14480 loss = 0.418 (2.690 sec/step)
step 14490 loss = 0.454 (2.910 sec/step)
step 14500 loss = 0.465 (3.086 sec/step)
step 14510 loss = 0.426 (2.534 sec/step)
step 14520 loss = 0.465 (2.458 sec/step)
step 14530 loss = 0.432 (2.920 sec/step)
step 14540 loss = 0.439 (2.812 sec/step)
step 14550 loss = 0.448 (2.825 sec/step)
step 14560 loss = 0.461 (2.898 sec/step)
step 14570 loss = 0.445 (2.608 sec/step)
step 14580 loss = 0.432 (2.499 sec/step)
step 14590 loss = 0.417 (3.016 sec/step)
step 14600 loss = 0.445 (2.691 sec/step)
step 14610 loss = 0.439 (2.762 sec/step)
step 14620 loss = 0.434 (2.710 sec/step)
step 14630 loss = 0.431 (2.652 sec/step)
step 14640 loss = 0.449 (2.744 sec/step)
step 14650 loss = 0.447 (2.818 sec/step)
step 14660 loss = 0.446 (2.840 sec/step)
step 14670 loss = 0.432 (2.993 sec/step)
step 14680 loss = 0.423 (2.804 sec/step)
step 14690 loss = 0.434 (3.084 sec/step)
step 14700 loss = 0.432 (2.719 sec/step)
step 14710 loss = 0.414 (2.824 sec/step)
step 14720 loss = 0.444 (2.724 sec/step)
step 14730 loss = 0.460 (2.853 sec/step)
step 14740 loss = 0.440 (2.748 sec/step)
step 14750 loss = 0.401 (2.843 sec/step)
step 14760 loss = 0.439 (2.519 sec/step)
step 14770 loss = 0.460 (3.180 sec/step)
step 14780 loss = 0.452 (2.995 sec/step)
step 14790 loss = 0.447 (2.563 sec/step)
step 14800 loss = 0.478 (2.844 sec/step)
step 14810 loss = 0.438 (3.141 sec/step)
step 14820 loss = 0.459 (2.767 sec/step)
step 14830 loss = 0.441 (2.629 sec/step)
New Best Loss 0.075 < Old Best 0.076. Saving...
The checkpoint has been created.
Training Epoch 4/100
step 14840 loss = 0.403 (2.933 sec/step)
step 14850 loss = 0.417 (3.003 sec/step)
step 14860 loss = 0.384 (3.060 sec/step)
step 14870 loss = 0.374 (3.335 sec/step)
step 14880 loss = 0.389 (2.464 sec/step)
step 14890 loss = 0.400 (3.097 sec/step)
step 14900 loss = 0.396 (2.797 sec/step)
step 14910 loss = 0.384 (2.830 sec/step)
step 14920 loss = 0.354 (2.823 sec/step)
step 14930 loss = 0.391 (2.725 sec/step)
step 14940 loss = 0.363 (2.791 sec/step)
step 14950 loss = 0.364 (2.704 sec/step)
step 14960 loss = 0.364 (2.766 sec/step)
step 14970 loss = 0.393 (2.851 sec/step)
step 14980 loss = 0.417 (3.010 sec/step)
step 14990 loss = 0.407 (2.767 sec/step)
step 15000 loss = 0.388 (3.036 sec/step)
step 15010 loss = 0.403 (2.963 sec/step)
step 15020 loss = 0.412 (2.753 sec/step)
step 15030 loss = 0.394 (2.800 sec/step)
step 15040 loss = 0.371 (2.746 sec/step)
step 15050 loss = 0.375 (2.692 sec/step)
step 15060 loss = 0.366 (2.766 sec/step)
step 15070 loss = 0.384 (2.660 sec/step)
step 15080 loss = 0.372 (2.768 sec/step)
step 15090 loss = 0.383 (3.016 sec/step)
step 15100 loss = 0.409 (3.114 sec/step)
step 15110 loss = 0.406 (2.504 sec/step)
step 15120 loss = 0.419 (2.463 sec/step)
step 15130 loss = 0.357 (2.902 sec/step)
step 15140 loss = 0.382 (3.100 sec/step)
step 15150 loss = 0.400 (2.972 sec/step)
step 15160 loss = 0.372 (2.896 sec/step)
step 15170 loss = 0.439 (2.817 sec/step)
step 15180 loss = 0.374 (2.900 sec/step)
step 15190 loss = 0.383 (2.683 sec/step)
step 15200 loss = 0.361 (2.707 sec/step)
step 15210 loss = 0.404 (2.793 sec/step)
step 15220 loss = 0.406 (3.062 sec/step)
step 15230 loss = 0.367 (2.890 sec/step)
step 15240 loss = 0.405 (2.964 sec/step)
step 15250 loss = 0.376 (2.911 sec/step)
step 15260 loss = 0.373 (2.805 sec/step)
step 15270 loss = 0.384 (2.779 sec/step)
step 15280 loss = 0.396 (2.791 sec/step)
step 15290 loss = 0.396 (2.804 sec/step)
step 15300 loss = 0.380 (2.890 sec/step)
step 15310 loss = 0.395 (2.677 sec/step)
step 15320 loss = 0.379 (3.037 sec/step)
step 15330 loss = 0.357 (2.723 sec/step)
step 15340 loss = 0.359 (2.589 sec/step)
step 15350 loss = 0.385 (2.520 sec/step)
step 15360 loss = 0.379 (2.923 sec/step)
step 15370 loss = 0.414 (2.909 sec/step)
step 15380 loss = 0.393 (3.172 sec/step)
step 15390 loss = 0.403 (3.067 sec/step)
step 15400 loss = 0.406 (2.888 sec/step)
step 15410 loss = 0.381 (2.448 sec/step)
step 15420 loss = 0.393 (2.380 sec/step)
step 15430 loss = 0.386 (2.258 sec/step)
step 15440 loss = 0.387 (2.196 sec/step)
step 15450 loss = 0.386 (2.350 sec/step)
step 15460 loss = 0.397 (2.331 sec/step)
step 15470 loss = 0.380 (2.294 sec/step)
step 15480 loss = 0.371 (2.317 sec/step)
step 15490 loss = 0.403 (2.515 sec/step)
step 15500 loss = 0.381 (2.280 sec/step)
step 15510 loss = 0.396 (2.384 sec/step)
step 15520 loss = 0.382 (3.465 sec/step)
step 15530 loss = 0.386 (2.645 sec/step)
step 15540 loss = 0.393 (2.435 sec/step)
step 15550 loss = 0.400 (3.332 sec/step)
step 15560 loss = 0.407 (2.936 sec/step)
step 15570 loss = 0.402 (2.871 sec/step)
step 15580 loss = 0.399 (3.149 sec/step)
step 15590 loss = 0.390 (3.211 sec/step)
step 15600 loss = 0.391 (2.923 sec/step)
step 15610 loss = 0.394 (2.562 sec/step)
step 15620 loss = 0.377 (3.032 sec/step)
step 15630 loss = 0.384 (2.665 sec/step)
step 15640 loss = 0.392 (2.960 sec/step)
step 15650 loss = 0.377 (2.846 sec/step)
step 15660 loss = 0.394 (2.751 sec/step)
step 15670 loss = 0.387 (3.093 sec/step)
step 15680 loss = 0.399 (3.265 sec/step)
step 15690 loss = 0.389 (2.815 sec/step)
step 15700 loss = 0.378 (2.749 sec/step)
step 15710 loss = 0.404 (3.056 sec/step)
step 15720 loss = 0.394 (2.649 sec/step)
step 15730 loss = 0.394 (2.740 sec/step)
step 15740 loss = 0.373 (2.627 sec/step)
step 15750 loss = 0.373 (2.609 sec/step)
step 15760 loss = 0.402 (2.668 sec/step)
step 15770 loss = 0.380 (2.940 sec/step)
step 15780 loss = 0.374 (2.686 sec/step)
step 15790 loss = 0.416 (2.462 sec/step)
step 15800 loss = 0.393 (2.770 sec/step)
step 15810 loss = 0.417 (2.898 sec/step)
step 15820 loss = 0.385 (2.955 sec/step)
step 15830 loss = 0.379 (2.999 sec/step)
step 15840 loss = 0.376 (2.807 sec/step)
step 15850 loss = 0.400 (2.924 sec/step)
step 15860 loss = 0.407 (2.993 sec/step)
step 15870 loss = 0.399 (2.626 sec/step)
step 15880 loss = 0.374 (2.649 sec/step)
step 15890 loss = 0.399 (2.617 sec/step)
step 15900 loss = 0.398 (2.982 sec/step)
step 15910 loss = 0.427 (2.794 sec/step)
step 15920 loss = 0.437 (2.818 sec/step)
step 15930 loss = 0.428 (2.958 sec/step)
step 15940 loss = 0.392 (3.189 sec/step)
step 15950 loss = 0.384 (2.919 sec/step)
step 15960 loss = 0.401 (2.822 sec/step)
step 15970 loss = 0.425 (2.725 sec/step)
step 15980 loss = 0.409 (2.498 sec/step)
step 15990 loss = 0.401 (2.882 sec/step)
step 16000 loss = 0.400 (2.756 sec/step)
step 16010 loss = 0.374 (3.021 sec/step)
step 16020 loss = 0.405 (2.606 sec/step)
step 16030 loss = 0.395 (2.713 sec/step)
step 16040 loss = 0.406 (2.803 sec/step)
step 16050 loss = 0.383 (2.745 sec/step)
step 16060 loss = 0.402 (2.861 sec/step)
step 16070 loss = 0.386 (2.751 sec/step)
step 16080 loss = 0.392 (3.010 sec/step)
step 16090 loss = 0.382 (2.748 sec/step)
step 16100 loss = 0.436 (2.879 sec/step)
step 16110 loss = 0.403 (2.901 sec/step)
step 16120 loss = 0.409 (2.806 sec/step)
step 16130 loss = 0.374 (2.976 sec/step)
step 16140 loss = 0.395 (2.599 sec/step)
step 16150 loss = 0.404 (3.074 sec/step)
step 16160 loss = 0.402 (2.496 sec/step)
step 16170 loss = 0.414 (2.718 sec/step)
step 16180 loss = 0.403 (2.922 sec/step)
step 16190 loss = 0.393 (2.520 sec/step)
step 16200 loss = 0.390 (3.063 sec/step)
step 16210 loss = 0.382 (2.732 sec/step)
step 16220 loss = 0.374 (2.881 sec/step)
step 16230 loss = 0.402 (2.717 sec/step)
step 16240 loss = 0.395 (2.905 sec/step)
step 16250 loss = 0.379 (2.744 sec/step)
step 16260 loss = 0.381 (2.909 sec/step)
step 16270 loss = 0.403 (2.797 sec/step)
step 16280 loss = 0.418 (2.885 sec/step)
step 16290 loss = 0.402 (2.973 sec/step)
step 16300 loss = 0.403 (2.779 sec/step)
step 16310 loss = 0.411 (2.771 sec/step)
step 16320 loss = 0.412 (2.759 sec/step)
step 16330 loss = 0.401 (3.007 sec/step)
step 16340 loss = 0.406 (2.575 sec/step)
step 16350 loss = 0.382 (3.169 sec/step)
step 16360 loss = 0.402 (2.741 sec/step)
step 16370 loss = 0.390 (2.822 sec/step)
step 16380 loss = 0.380 (2.829 sec/step)
step 16390 loss = 0.398 (2.835 sec/step)
step 16400 loss = 0.420 (2.811 sec/step)
step 16410 loss = 0.369 (2.782 sec/step)
step 16420 loss = 0.395 (2.672 sec/step)
step 16430 loss = 0.398 (2.649 sec/step)
step 16440 loss = 0.395 (3.131 sec/step)
step 16450 loss = 0.391 (2.778 sec/step)
step 16460 loss = 0.388 (2.701 sec/step)
step 16470 loss = 0.409 (2.717 sec/step)
step 16480 loss = 0.426 (2.862 sec/step)
step 16490 loss = 0.389 (2.970 sec/step)
step 16500 loss = 0.418 (2.689 sec/step)
step 16510 loss = 0.390 (3.208 sec/step)
step 16520 loss = 0.410 (3.031 sec/step)
step 16530 loss = 0.393 (2.665 sec/step)
step 16540 loss = 0.384 (2.786 sec/step)
step 16550 loss = 0.398 (2.845 sec/step)
step 16560 loss = 0.363 (2.712 sec/step)
step 16570 loss = 0.412 (2.924 sec/step)
step 16580 loss = 0.406 (2.969 sec/step)
step 16590 loss = 0.380 (2.723 sec/step)
step 16600 loss = 0.423 (2.803 sec/step)
step 16610 loss = 0.402 (2.797 sec/step)
step 16620 loss = 0.396 (2.899 sec/step)
step 16630 loss = 0.412 (2.798 sec/step)
step 16640 loss = 0.397 (2.810 sec/step)
step 16650 loss = 0.390 (2.919 sec/step)
step 16660 loss = 0.396 (2.992 sec/step)
step 16670 loss = 0.411 (3.022 sec/step)
step 16680 loss = 0.402 (2.742 sec/step)
step 16690 loss = 0.407 (2.858 sec/step)
step 16700 loss = 0.414 (2.687 sec/step)
step 16710 loss = 0.381 (2.872 sec/step)
step 16720 loss = 0.421 (2.844 sec/step)
step 16730 loss = 0.400 (3.080 sec/step)
step 16740 loss = 0.412 (2.735 sec/step)
step 16750 loss = 0.405 (3.101 sec/step)
step 16760 loss = 0.424 (3.055 sec/step)
step 16770 loss = 0.395 (2.770 sec/step)
step 16780 loss = 0.409 (2.787 sec/step)
step 16790 loss = 0.383 (2.610 sec/step)
step 16800 loss = 0.423 (2.842 sec/step)
step 16810 loss = 0.405 (2.533 sec/step)
step 16820 loss = 0.385 (2.712 sec/step)
step 16830 loss = 0.418 (2.576 sec/step)
step 16840 loss = 0.417 (2.872 sec/step)
step 16850 loss = 0.390 (2.398 sec/step)
step 16860 loss = 0.375 (2.835 sec/step)
step 16870 loss = 0.412 (2.749 sec/step)
step 16880 loss = 0.389 (2.590 sec/step)
step 16890 loss = 0.429 (3.004 sec/step)
step 16900 loss = 0.380 (2.807 sec/step)
step 16910 loss = 0.398 (2.641 sec/step)
step 16920 loss = 0.399 (2.744 sec/step)
step 16930 loss = 0.407 (3.048 sec/step)
step 16940 loss = 0.383 (2.909 sec/step)
step 16950 loss = 0.398 (2.934 sec/step)
step 16960 loss = 0.406 (2.671 sec/step)
step 16970 loss = 0.385 (2.946 sec/step)
step 16980 loss = 0.380 (2.934 sec/step)
step 16990 loss = 0.379 (3.140 sec/step)
step 17000 loss = 0.390 (2.818 sec/step)
step 17010 loss = 0.422 (2.894 sec/step)
step 17020 loss = 0.375 (2.941 sec/step)
step 17030 loss = 0.417 (2.937 sec/step)
step 17040 loss = 0.433 (2.962 sec/step)
step 17050 loss = 0.386 (2.857 sec/step)
step 17060 loss = 0.392 (2.602 sec/step)
step 17070 loss = 0.412 (2.526 sec/step)
step 17080 loss = 0.429 (2.715 sec/step)
step 17090 loss = 0.388 (3.028 sec/step)
step 17100 loss = 0.419 (2.499 sec/step)
step 17110 loss = 0.434 (2.845 sec/step)
step 17120 loss = 0.394 (2.805 sec/step)
step 17130 loss = 0.418 (2.460 sec/step)
step 17140 loss = 0.421 (2.890 sec/step)
step 17150 loss = 0.402 (2.936 sec/step)
step 17160 loss = 0.405 (3.057 sec/step)
step 17170 loss = 0.406 (2.762 sec/step)
step 17180 loss = 0.393 (3.001 sec/step)
step 17190 loss = 0.407 (2.881 sec/step)
step 17200 loss = 0.417 (3.052 sec/step)
step 17210 loss = 0.399 (3.085 sec/step)
step 17220 loss = 0.384 (2.620 sec/step)
step 17230 loss = 0.399 (2.599 sec/step)
step 17240 loss = 0.402 (3.030 sec/step)
step 17250 loss = 0.414 (2.914 sec/step)
step 17260 loss = 0.398 (2.605 sec/step)
step 17270 loss = 0.401 (3.162 sec/step)
step 17280 loss = 0.400 (2.816 sec/step)
step 17290 loss = 0.397 (2.858 sec/step)
step 17300 loss = 0.429 (2.591 sec/step)
step 17310 loss = 0.428 (2.749 sec/step)
step 17320 loss = 0.390 (2.741 sec/step)
step 17330 loss = 0.400 (3.163 sec/step)
step 17340 loss = 0.376 (3.007 sec/step)
step 17350 loss = 0.400 (2.885 sec/step)
step 17360 loss = 0.414 (3.004 sec/step)
step 17370 loss = 0.426 (3.229 sec/step)
step 17380 loss = 0.406 (2.861 sec/step)
step 17390 loss = 0.399 (2.677 sec/step)
step 17400 loss = 0.431 (2.901 sec/step)
step 17410 loss = 0.396 (2.779 sec/step)
step 17420 loss = 0.399 (3.005 sec/step)
step 17430 loss = 0.384 (2.868 sec/step)
step 17440 loss = 0.399 (2.605 sec/step)
step 17450 loss = 0.399 (2.648 sec/step)
step 17460 loss = 0.391 (2.972 sec/step)
step 17470 loss = 0.383 (3.241 sec/step)
step 17480 loss = 0.389 (2.992 sec/step)
step 17490 loss = 0.402 (3.021 sec/step)
step 17500 loss = 0.400 (2.752 sec/step)
step 17510 loss = 0.419 (2.846 sec/step)
step 17520 loss = 0.434 (2.682 sec/step)
step 17530 loss = 0.408 (2.835 sec/step)
step 17540 loss = 0.406 (2.892 sec/step)
step 17550 loss = 0.415 (2.760 sec/step)
step 17560 loss = 0.408 (2.763 sec/step)
step 17570 loss = 0.407 (2.620 sec/step)
step 17580 loss = 0.418 (2.841 sec/step)
step 17590 loss = 0.413 (2.739 sec/step)
step 17600 loss = 0.406 (3.031 sec/step)
step 17610 loss = 0.410 (2.873 sec/step)
step 17620 loss = 0.423 (2.698 sec/step)
step 17630 loss = 0.402 (2.704 sec/step)
step 17640 loss = 0.395 (2.816 sec/step)
step 17650 loss = 0.402 (2.837 sec/step)
step 17660 loss = 0.400 (3.018 sec/step)
step 17670 loss = 0.401 (2.555 sec/step)
step 17680 loss = 0.391 (2.795 sec/step)
step 17690 loss = 0.391 (2.777 sec/step)
step 17700 loss = 0.410 (2.673 sec/step)
step 17710 loss = 0.395 (2.772 sec/step)
step 17720 loss = 0.393 (2.775 sec/step)
step 17730 loss = 0.400 (2.748 sec/step)
step 17740 loss = 0.435 (2.874 sec/step)
step 17750 loss = 0.396 (2.866 sec/step)
step 17760 loss = 0.415 (2.906 sec/step)
step 17770 loss = 0.402 (2.871 sec/step)
step 17780 loss = 0.385 (2.974 sec/step)
step 17790 loss = 0.400 (3.036 sec/step)
step 17800 loss = 0.401 (2.804 sec/step)
step 17810 loss = 0.415 (2.605 sec/step)
step 17820 loss = 0.427 (2.789 sec/step)
step 17830 loss = 0.374 (2.803 sec/step)
step 17840 loss = 0.420 (2.907 sec/step)
step 17850 loss = 0.411 (2.579 sec/step)
step 17860 loss = 0.442 (2.883 sec/step)
step 17870 loss = 0.399 (2.864 sec/step)
step 17880 loss = 0.377 (2.963 sec/step)
step 17890 loss = 0.432 (2.815 sec/step)
step 17900 loss = 0.399 (2.768 sec/step)
step 17910 loss = 0.389 (3.079 sec/step)
step 17920 loss = 0.414 (2.595 sec/step)
step 17930 loss = 0.405 (2.609 sec/step)
step 17940 loss = 0.393 (3.057 sec/step)
step 17950 loss = 0.420 (3.097 sec/step)
step 17960 loss = 0.409 (2.742 sec/step)
step 17970 loss = 0.399 (2.957 sec/step)
step 17980 loss = 0.413 (3.003 sec/step)
step 17990 loss = 0.419 (3.015 sec/step)
step 18000 loss = 0.409 (2.726 sec/step)
step 18010 loss = 0.427 (3.140 sec/step)
step 18020 loss = 0.426 (2.655 sec/step)
step 18030 loss = 0.445 (2.830 sec/step)
step 18040 loss = 0.434 (2.927 sec/step)
step 18050 loss = 0.379 (3.210 sec/step)
step 18060 loss = 0.405 (2.568 sec/step)
step 18070 loss = 0.404 (3.017 sec/step)
step 18080 loss = 0.434 (2.812 sec/step)
step 18090 loss = 0.398 (2.899 sec/step)
step 18100 loss = 0.410 (3.035 sec/step)
step 18110 loss = 0.404 (3.328 sec/step)
step 18120 loss = 0.412 (2.827 sec/step)
step 18130 loss = 0.414 (2.981 sec/step)
step 18140 loss = 0.399 (2.859 sec/step)
step 18150 loss = 0.424 (2.847 sec/step)
step 18160 loss = 0.402 (2.776 sec/step)
step 18170 loss = 0.398 (2.672 sec/step)
step 18180 loss = 0.389 (2.966 sec/step)
step 18190 loss = 0.395 (2.828 sec/step)
step 18200 loss = 0.405 (2.955 sec/step)
step 18210 loss = 0.401 (2.777 sec/step)
step 18220 loss = 0.435 (2.837 sec/step)
step 18230 loss = 0.410 (2.872 sec/step)
step 18240 loss = 0.427 (2.929 sec/step)
step 18250 loss = 0.395 (2.846 sec/step)
step 18260 loss = 0.407 (2.805 sec/step)
step 18270 loss = 0.420 (2.701 sec/step)
step 18280 loss = 0.414 (2.694 sec/step)
step 18290 loss = 0.402 (3.313 sec/step)
step 18300 loss = 0.421 (2.877 sec/step)
step 18310 loss = 0.429 (2.814 sec/step)
step 18320 loss = 0.414 (2.872 sec/step)
step 18330 loss = 0.396 (2.681 sec/step)
step 18340 loss = 0.394 (2.966 sec/step)
step 18350 loss = 0.405 (2.829 sec/step)
step 18360 loss = 0.410 (2.616 sec/step)
step 18370 loss = 0.408 (2.848 sec/step)
step 18380 loss = 0.393 (2.771 sec/step)
step 18390 loss = 0.393 (2.612 sec/step)
step 18400 loss = 0.390 (3.211 sec/step)
step 18410 loss = 0.407 (2.976 sec/step)
step 18420 loss = 0.408 (2.785 sec/step)
step 18430 loss = 0.421 (2.565 sec/step)
step 18440 loss = 0.406 (2.764 sec/step)
step 18450 loss = 0.447 (2.558 sec/step)
step 18460 loss = 0.416 (2.713 sec/step)
step 18470 loss = 0.415 (2.712 sec/step)
step 18480 loss = 0.424 (2.989 sec/step)
step 18490 loss = 0.380 (2.817 sec/step)
step 18500 loss = 0.414 (2.576 sec/step)
step 18510 loss = 0.395 (2.865 sec/step)
step 18520 loss = 0.400 (3.148 sec/step)
step 18530 loss = 0.408 (2.778 sec/step)
Training Epoch 5/100
step 18540 loss = 0.355 (2.774 sec/step)
step 18550 loss = 0.356 (3.037 sec/step)
step 18560 loss = 0.383 (2.587 sec/step)
step 18570 loss = 0.340 (2.827 sec/step)
step 18580 loss = 0.357 (2.983 sec/step)
step 18590 loss = 0.346 (2.912 sec/step)
step 18600 loss = 0.367 (2.706 sec/step)
step 18610 loss = 0.344 (2.801 sec/step)
step 18620 loss = 0.380 (2.685 sec/step)
step 18630 loss = 0.376 (2.707 sec/step)
step 18640 loss = 0.373 (2.664 sec/step)
step 18650 loss = 0.384 (2.857 sec/step)
step 18660 loss = 0.342 (2.906 sec/step)
step 18670 loss = 0.360 (2.837 sec/step)
step 18680 loss = 0.358 (3.044 sec/step)
step 18690 loss = 0.329 (2.952 sec/step)
step 18700 loss = 0.346 (2.624 sec/step)
step 18710 loss = 0.344 (2.632 sec/step)
step 18720 loss = 0.364 (3.045 sec/step)
step 18730 loss = 0.351 (2.890 sec/step)
step 18740 loss = 0.354 (3.018 sec/step)
step 18750 loss = 0.362 (2.673 sec/step)
step 18760 loss = 0.352 (2.659 sec/step)
step 18770 loss = 0.350 (2.816 sec/step)
step 18780 loss = 0.358 (2.786 sec/step)
step 18790 loss = 0.330 (2.621 sec/step)
step 18800 loss = 0.363 (2.607 sec/step)
step 18810 loss = 0.359 (2.777 sec/step)
step 18820 loss = 0.360 (2.892 sec/step)
step 18830 loss = 0.346 (2.598 sec/step)
step 18840 loss = 0.347 (2.871 sec/step)
step 18850 loss = 0.379 (2.810 sec/step)
step 18860 loss = 0.390 (2.753 sec/step)
step 18870 loss = 0.382 (2.555 sec/step)
step 18880 loss = 0.341 (2.946 sec/step)
step 18890 loss = 0.358 (3.087 sec/step)
step 18900 loss = 0.357 (2.821 sec/step)
step 18910 loss = 0.360 (2.873 sec/step)
step 18920 loss = 0.357 (2.907 sec/step)
step 18930 loss = 0.370 (3.057 sec/step)
step 18940 loss = 0.355 (2.938 sec/step)
step 18950 loss = 0.369 (2.654 sec/step)
step 18960 loss = 0.365 (2.776 sec/step)
step 18970 loss = 0.376 (3.192 sec/step)
step 18980 loss = 0.351 (3.081 sec/step)
step 18990 loss = 0.373 (2.704 sec/step)
step 19000 loss = 0.376 (2.962 sec/step)
step 19010 loss = 0.362 (2.951 sec/step)
step 19020 loss = 0.352 (2.832 sec/step)
step 19030 loss = 0.341 (2.828 sec/step)
step 19040 loss = 0.362 (2.667 sec/step)
step 19050 loss = 0.375 (3.162 sec/step)
step 19060 loss = 0.335 (2.997 sec/step)
step 19070 loss = 0.356 (2.950 sec/step)
step 19080 loss = 0.349 (2.912 sec/step)
step 19090 loss = 0.403 (3.164 sec/step)
step 19100 loss = 0.375 (2.705 sec/step)
step 19110 loss = 0.359 (2.787 sec/step)
step 19120 loss = 0.376 (2.699 sec/step)
step 19130 loss = 0.347 (2.810 sec/step)
step 19140 loss = 0.356 (2.911 sec/step)
step 19150 loss = 0.380 (2.768 sec/step)
step 19160 loss = 0.348 (2.833 sec/step)
step 19170 loss = 0.365 (3.038 sec/step)
step 19180 loss = 0.372 (2.692 sec/step)
step 19190 loss = 0.352 (2.766 sec/step)
step 19200 loss = 0.362 (2.744 sec/step)
step 19210 loss = 0.369 (3.279 sec/step)
step 19220 loss = 0.374 (2.559 sec/step)
step 19230 loss = 0.365 (2.912 sec/step)
step 19240 loss = 0.352 (2.786 sec/step)
step 19250 loss = 0.385 (2.914 sec/step)
step 19260 loss = 0.365 (2.896 sec/step)
step 19270 loss = 0.370 (2.904 sec/step)
step 19280 loss = 0.371 (2.475 sec/step)
step 19290 loss = 0.364 (3.138 sec/step)
step 19300 loss = 0.374 (2.933 sec/step)
step 19310 loss = 0.360 (2.792 sec/step)
step 19320 loss = 0.364 (2.466 sec/step)
step 19330 loss = 0.370 (2.721 sec/step)
step 19340 loss = 0.358 (2.937 sec/step)
step 19350 loss = 0.383 (2.828 sec/step)
step 19360 loss = 0.361 (2.803 sec/step)
step 19370 loss = 0.382 (2.634 sec/step)
step 19380 loss = 0.384 (2.814 sec/step)
step 19390 loss = 0.372 (3.049 sec/step)
step 19400 loss = 0.365 (2.907 sec/step)
step 19410 loss = 0.364 (2.755 sec/step)
step 19420 loss = 0.354 (3.126 sec/step)
step 19430 loss = 0.356 (2.995 sec/step)
step 19440 loss = 0.353 (2.641 sec/step)
step 19450 loss = 0.351 (3.155 sec/step)
step 19460 loss = 0.376 (2.774 sec/step)
step 19470 loss = 0.380 (2.626 sec/step)
step 19480 loss = 0.355 (2.746 sec/step)
step 19490 loss = 0.363 (2.654 sec/step)
step 19500 loss = 0.369 (2.961 sec/step)
step 19510 loss = 0.372 (2.977 sec/step)
step 19520 loss = 0.350 (2.882 sec/step)
step 19530 loss = 0.368 (2.819 sec/step)
step 19540 loss = 0.367 (2.424 sec/step)
step 19550 loss = 0.372 (2.882 sec/step)
step 19560 loss = 0.350 (2.685 sec/step)
step 19570 loss = 0.371 (2.807 sec/step)
step 19580 loss = 0.333 (2.874 sec/step)
step 19590 loss = 0.380 (2.681 sec/step)
step 19600 loss = 0.391 (2.519 sec/step)
step 19610 loss = 0.371 (2.859 sec/step)
step 19620 loss = 0.355 (2.604 sec/step)
step 19630 loss = 0.364 (2.748 sec/step)
step 19640 loss = 0.344 (2.722 sec/step)
step 19650 loss = 0.379 (2.871 sec/step)
step 19660 loss = 0.367 (2.573 sec/step)
step 19670 loss = 0.387 (2.888 sec/step)
step 19680 loss = 0.356 (2.754 sec/step)
step 19690 loss = 0.369 (2.827 sec/step)
step 19700 loss = 0.351 (2.657 sec/step)
step 19710 loss = 0.365 (2.781 sec/step)
step 19720 loss = 0.346 (2.927 sec/step)
step 19730 loss = 0.371 (2.813 sec/step)
step 19740 loss = 0.358 (3.128 sec/step)
step 19750 loss = 0.369 (2.886 sec/step)
step 19760 loss = 0.352 (3.070 sec/step)
step 19770 loss = 0.363 (2.788 sec/step)
step 19780 loss = 0.356 (2.591 sec/step)
step 19790 loss = 0.368 (3.157 sec/step)
step 19800 loss = 0.341 (2.614 sec/step)
step 19810 loss = 0.363 (2.877 sec/step)
step 19820 loss = 0.369 (2.755 sec/step)
step 19830 loss = 0.360 (2.831 sec/step)
step 19840 loss = 0.354 (2.811 sec/step)
step 19850 loss = 0.347 (2.824 sec/step)
step 19860 loss = 0.360 (2.954 sec/step)
step 19870 loss = 0.369 (2.874 sec/step)
step 19880 loss = 0.361 (2.652 sec/step)
step 19890 loss = 0.356 (2.635 sec/step)
step 19900 loss = 0.383 (2.826 sec/step)
step 19910 loss = 0.364 (2.639 sec/step)
step 19920 loss = 0.365 (3.150 sec/step)
step 19930 loss = 0.365 (2.803 sec/step)
step 19940 loss = 0.396 (2.687 sec/step)
step 19950 loss = 0.364 (2.691 sec/step)
step 19960 loss = 0.375 (3.043 sec/step)
step 19970 loss = 0.358 (2.724 sec/step)
step 19980 loss = 0.377 (2.880 sec/step)
step 19990 loss = 0.360 (2.927 sec/step)
step 20000 loss = 0.350 (2.943 sec/step)
step 20010 loss = 0.369 (2.842 sec/step)
step 20020 loss = 0.377 (3.057 sec/step)
step 20030 loss = 0.369 (3.134 sec/step)
step 20040 loss = 0.368 (2.824 sec/step)
step 20050 loss = 0.370 (2.658 sec/step)
step 20060 loss = 0.358 (2.790 sec/step)
step 20070 loss = 0.364 (2.956 sec/step)
step 20080 loss = 0.366 (3.107 sec/step)
step 20090 loss = 0.360 (2.715 sec/step)
step 20100 loss = 0.367 (2.889 sec/step)
step 20110 loss = 0.379 (2.729 sec/step)
step 20120 loss = 0.403 (2.906 sec/step)
step 20130 loss = 0.377 (2.909 sec/step)
step 20140 loss = 0.360 (3.061 sec/step)
step 20150 loss = 0.367 (2.845 sec/step)
step 20160 loss = 0.383 (2.929 sec/step)
step 20170 loss = 0.375 (3.125 sec/step)
step 20180 loss = 0.350 (2.998 sec/step)
step 20190 loss = 0.380 (2.719 sec/step)
step 20200 loss = 0.378 (2.890 sec/step)
step 20210 loss = 0.360 (2.641 sec/step)
step 20220 loss = 0.358 (2.780 sec/step)
step 20230 loss = 0.365 (2.655 sec/step)
step 20240 loss = 0.382 (3.074 sec/step)
step 20250 loss = 0.361 (2.668 sec/step)
step 20260 loss = 0.354 (2.810 sec/step)
step 20270 loss = 0.356 (2.725 sec/step)
step 20280 loss = 0.392 (2.724 sec/step)
step 20290 loss = 0.379 (2.823 sec/step)
step 20300 loss = 0.374 (2.828 sec/step)
step 20310 loss = 0.358 (2.996 sec/step)
step 20320 loss = 0.359 (2.788 sec/step)
step 20330 loss = 0.327 (2.614 sec/step)
step 20340 loss = 0.367 (2.881 sec/step)
step 20350 loss = 0.379 (3.128 sec/step)
step 20360 loss = 0.358 (2.735 sec/step)
step 20370 loss = 0.373 (2.911 sec/step)
step 20380 loss = 0.421 (2.967 sec/step)
step 20390 loss = 0.371 (2.901 sec/step)
step 20400 loss = 0.373 (2.803 sec/step)
step 20410 loss = 0.404 (2.971 sec/step)
step 20420 loss = 0.369 (2.996 sec/step)
step 20430 loss = 0.361 (3.032 sec/step)
step 20440 loss = 0.356 (2.839 sec/step)
step 20450 loss = 0.389 (2.677 sec/step)
step 20460 loss = 0.371 (2.588 sec/step)
step 20470 loss = 0.370 (3.066 sec/step)
step 20480 loss = 0.359 (2.736 sec/step)
step 20490 loss = 0.377 (2.936 sec/step)
step 20500 loss = 0.356 (2.707 sec/step)
step 20510 loss = 0.367 (2.749 sec/step)
step 20520 loss = 0.348 (2.778 sec/step)
step 20530 loss = 0.362 (2.633 sec/step)
step 20540 loss = 0.345 (2.957 sec/step)
step 20550 loss = 0.388 (2.716 sec/step)
step 20560 loss = 0.337 (2.814 sec/step)
step 20570 loss = 0.362 (2.975 sec/step)
step 20580 loss = 0.371 (2.813 sec/step)
step 20590 loss = 0.366 (2.981 sec/step)
step 20600 loss = 0.375 (3.109 sec/step)
step 20610 loss = 0.364 (2.650 sec/step)
step 20620 loss = 0.372 (2.759 sec/step)
step 20630 loss = 0.362 (2.933 sec/step)
step 20640 loss = 0.351 (2.899 sec/step)
step 20650 loss = 0.369 (3.237 sec/step)
step 20660 loss = 0.369 (2.979 sec/step)
step 20670 loss = 0.378 (2.507 sec/step)
step 20680 loss = 0.374 (3.105 sec/step)
step 20690 loss = 0.371 (2.803 sec/step)
step 20700 loss = 0.368 (2.816 sec/step)
step 20710 loss = 0.361 (2.531 sec/step)
step 20720 loss = 0.383 (2.918 sec/step)
step 20730 loss = 0.365 (2.872 sec/step)
step 20740 loss = 0.367 (2.783 sec/step)
step 20750 loss = 0.369 (2.906 sec/step)
step 20760 loss = 0.371 (2.950 sec/step)
step 20770 loss = 0.358 (2.873 sec/step)
step 20780 loss = 0.375 (2.828 sec/step)
step 20790 loss = 0.367 (2.667 sec/step)
step 20800 loss = 0.374 (3.034 sec/step)
step 20810 loss = 0.397 (2.868 sec/step)
step 20820 loss = 0.358 (2.955 sec/step)
step 20830 loss = 0.379 (2.855 sec/step)
step 20840 loss = 0.384 (2.721 sec/step)
step 20850 loss = 0.358 (3.401 sec/step)
step 20860 loss = 0.397 (2.762 sec/step)
step 20870 loss = 0.377 (2.699 sec/step)
step 20880 loss = 0.378 (2.794 sec/step)
step 20890 loss = 0.382 (2.742 sec/step)
step 20900 loss = 0.379 (2.926 sec/step)
step 20910 loss = 0.384 (2.808 sec/step)
step 20920 loss = 0.383 (2.937 sec/step)
step 20930 loss = 0.365 (2.664 sec/step)
step 20940 loss = 0.368 (2.823 sec/step)
step 20950 loss = 0.356 (2.734 sec/step)
step 20960 loss = 0.364 (2.636 sec/step)
step 20970 loss = 0.397 (3.332 sec/step)
step 20980 loss = 0.388 (2.852 sec/step)
step 20990 loss = 0.393 (2.538 sec/step)
step 21000 loss = 0.399 (2.548 sec/step)
step 21010 loss = 0.388 (2.817 sec/step)
step 21020 loss = 0.383 (2.841 sec/step)
step 21030 loss = 0.394 (2.659 sec/step)
step 21040 loss = 0.354 (2.839 sec/step)
step 21050 loss = 0.384 (3.299 sec/step)
step 21060 loss = 0.396 (2.738 sec/step)
step 21070 loss = 0.370 (2.815 sec/step)
step 21080 loss = 0.367 (2.710 sec/step)
step 21090 loss = 0.371 (2.850 sec/step)
step 21100 loss = 0.372 (2.899 sec/step)
step 21110 loss = 0.387 (3.200 sec/step)
step 21120 loss = 0.362 (2.645 sec/step)
step 21130 loss = 0.358 (2.670 sec/step)
step 21140 loss = 0.378 (2.267 sec/step)
step 21150 loss = 0.378 (2.206 sec/step)
step 21160 loss = 0.372 (2.361 sec/step)
step 21170 loss = 0.375 (2.366 sec/step)
step 21180 loss = 0.380 (2.288 sec/step)
step 21190 loss = 0.376 (2.352 sec/step)
step 21200 loss = 0.377 (2.254 sec/step)
step 21210 loss = 0.380 (2.469 sec/step)
step 21220 loss = 0.371 (2.339 sec/step)
step 21230 loss = 0.365 (2.301 sec/step)
step 21240 loss = 0.359 (2.402 sec/step)
step 21250 loss = 0.370 (2.406 sec/step)
step 21260 loss = 0.378 (2.460 sec/step)
step 21270 loss = 0.340 (3.990 sec/step)
step 21280 loss = 0.385 (2.546 sec/step)
step 21290 loss = 0.386 (2.454 sec/step)
step 21300 loss = 0.367 (2.901 sec/step)
step 21310 loss = 0.376 (2.724 sec/step)
step 21320 loss = 0.360 (2.699 sec/step)
step 21330 loss = 0.360 (3.021 sec/step)
step 21340 loss = 0.370 (2.714 sec/step)
step 21350 loss = 0.370 (2.783 sec/step)
step 21360 loss = 0.399 (3.120 sec/step)
step 21370 loss = 0.364 (2.804 sec/step)
step 21380 loss = 0.378 (3.239 sec/step)
step 21390 loss = 0.367 (2.625 sec/step)
step 21400 loss = 0.369 (2.924 sec/step)
step 21410 loss = 0.374 (2.876 sec/step)
step 21420 loss = 0.376 (2.655 sec/step)
step 21430 loss = 0.368 (2.997 sec/step)
step 21440 loss = 0.379 (2.799 sec/step)
step 21450 loss = 0.372 (2.638 sec/step)
step 21460 loss = 0.392 (3.130 sec/step)
step 21470 loss = 0.375 (2.660 sec/step)
step 21480 loss = 0.394 (2.860 sec/step)
step 21490 loss = 0.394 (2.609 sec/step)
step 21500 loss = 0.392 (3.077 sec/step)
step 21510 loss = 0.381 (3.345 sec/step)
step 21520 loss = 0.375 (2.554 sec/step)
step 21530 loss = 0.393 (2.679 sec/step)
step 21540 loss = 0.397 (2.625 sec/step)
step 21550 loss = 0.403 (2.659 sec/step)
step 21560 loss = 0.379 (3.135 sec/step)
step 21570 loss = 0.395 (2.765 sec/step)
step 21580 loss = 0.366 (3.082 sec/step)
step 21590 loss = 0.379 (2.876 sec/step)
step 21600 loss = 0.388 (2.656 sec/step)
step 21610 loss = 0.369 (2.948 sec/step)
step 21620 loss = 0.373 (2.871 sec/step)
step 21630 loss = 0.359 (2.625 sec/step)
step 21640 loss = 0.389 (2.906 sec/step)
step 21650 loss = 0.359 (2.710 sec/step)
step 21660 loss = 0.398 (3.208 sec/step)
step 21670 loss = 0.357 (3.030 sec/step)
step 21680 loss = 0.372 (2.895 sec/step)
step 21690 loss = 0.373 (3.069 sec/step)
step 21700 loss = 0.403 (2.725 sec/step)
step 21710 loss = 0.336 (3.251 sec/step)
step 21720 loss = 0.361 (3.169 sec/step)
step 21730 loss = 0.377 (2.809 sec/step)
step 21740 loss = 0.385 (2.716 sec/step)
step 21750 loss = 0.383 (2.750 sec/step)
step 21760 loss = 0.380 (2.816 sec/step)
step 21770 loss = 0.384 (2.852 sec/step)
step 21780 loss = 0.397 (2.667 sec/step)
step 21790 loss = 0.376 (2.953 sec/step)
step 21800 loss = 0.378 (2.580 sec/step)
step 21810 loss = 0.387 (2.770 sec/step)
step 21820 loss = 0.390 (2.888 sec/step)
step 21830 loss = 0.393 (2.756 sec/step)
step 21840 loss = 0.390 (2.739 sec/step)
step 21850 loss = 0.376 (3.032 sec/step)
step 21860 loss = 0.354 (3.154 sec/step)
step 21870 loss = 0.391 (2.743 sec/step)
step 21880 loss = 0.369 (2.914 sec/step)
step 21890 loss = 0.356 (2.703 sec/step)
step 21900 loss = 0.392 (2.644 sec/step)
step 21910 loss = 0.395 (2.761 sec/step)
step 21920 loss = 0.388 (2.934 sec/step)
step 21930 loss = 0.360 (2.861 sec/step)
step 21940 loss = 0.366 (2.743 sec/step)
step 21950 loss = 0.370 (2.930 sec/step)
step 21960 loss = 0.372 (2.761 sec/step)
step 21970 loss = 0.359 (2.824 sec/step)
step 21980 loss = 0.385 (2.790 sec/step)
step 21990 loss = 0.363 (3.133 sec/step)
step 22000 loss = 0.387 (2.921 sec/step)
step 22010 loss = 0.403 (3.005 sec/step)
step 22020 loss = 0.376 (2.634 sec/step)
step 22030 loss = 0.397 (2.769 sec/step)
step 22040 loss = 0.380 (2.708 sec/step)
step 22050 loss = 0.387 (2.794 sec/step)
step 22060 loss = 0.399 (2.948 sec/step)
step 22070 loss = 0.359 (2.695 sec/step)
step 22080 loss = 0.387 (2.878 sec/step)
step 22090 loss = 0.358 (3.095 sec/step)
step 22100 loss = 0.376 (2.818 sec/step)
step 22110 loss = 0.386 (2.785 sec/step)
step 22120 loss = 0.366 (2.875 sec/step)
step 22130 loss = 0.387 (2.499 sec/step)
step 22140 loss = 0.371 (2.748 sec/step)
step 22150 loss = 0.389 (2.846 sec/step)
step 22160 loss = 0.348 (2.471 sec/step)
step 22170 loss = 0.342 (2.988 sec/step)
step 22180 loss = 0.382 (2.745 sec/step)
step 22190 loss = 0.378 (2.678 sec/step)
step 22200 loss = 0.400 (2.754 sec/step)
step 22210 loss = 0.383 (2.922 sec/step)
step 22220 loss = 0.384 (2.712 sec/step)
step 22230 loss = 0.398 (2.789 sec/step)
step 22240 loss = 0.388 (2.568 sec/step)
Training Epoch 6/100
step 22250 loss = 0.354 (2.833 sec/step)
step 22260 loss = 0.343 (3.162 sec/step)
step 22270 loss = 0.345 (3.185 sec/step)
step 22280 loss = 0.330 (3.421 sec/step)
step 22290 loss = 0.320 (2.846 sec/step)
step 22300 loss = 0.329 (2.967 sec/step)
step 22310 loss = 0.357 (3.029 sec/step)
step 22320 loss = 0.315 (2.761 sec/step)
step 22330 loss = 0.338 (2.582 sec/step)
step 22340 loss = 0.324 (3.010 sec/step)
step 22350 loss = 0.334 (2.865 sec/step)
step 22360 loss = 0.333 (2.696 sec/step)
step 22370 loss = 0.322 (2.576 sec/step)
step 22380 loss = 0.303 (2.986 sec/step)
step 22390 loss = 0.328 (3.023 sec/step)
step 22400 loss = 0.359 (2.686 sec/step)
step 22410 loss = 0.307 (2.769 sec/step)
step 22420 loss = 0.328 (3.332 sec/step)
step 22430 loss = 0.313 (2.927 sec/step)
step 22440 loss = 0.346 (2.723 sec/step)
step 22450 loss = 0.333 (2.900 sec/step)
step 22460 loss = 0.337 (2.867 sec/step)
step 22470 loss = 0.345 (2.816 sec/step)
step 22480 loss = 0.336 (2.554 sec/step)
step 22490 loss = 0.314 (2.834 sec/step)
step 22500 loss = 0.307 (2.699 sec/step)
step 22510 loss = 0.320 (2.957 sec/step)
step 22520 loss = 0.313 (2.521 sec/step)
step 22530 loss = 0.330 (2.617 sec/step)
step 22540 loss = 0.327 (2.621 sec/step)
step 22550 loss = 0.343 (2.720 sec/step)
step 22560 loss = 0.352 (2.832 sec/step)
step 22570 loss = 0.313 (3.032 sec/step)
step 22580 loss = 0.300 (2.792 sec/step)
step 22590 loss = 0.343 (2.957 sec/step)
step 22600 loss = 0.337 (2.622 sec/step)
step 22610 loss = 0.314 (2.678 sec/step)
step 22620 loss = 0.319 (2.768 sec/step)
step 22630 loss = 0.323 (2.634 sec/step)
step 22640 loss = 0.349 (3.103 sec/step)
step 22650 loss = 0.343 (2.889 sec/step)
step 22660 loss = 0.336 (2.769 sec/step)
step 22670 loss = 0.340 (2.983 sec/step)
step 22680 loss = 0.337 (2.730 sec/step)
step 22690 loss = 0.328 (3.042 sec/step)
step 22700 loss = 0.321 (3.037 sec/step)
step 22710 loss = 0.327 (2.814 sec/step)
step 22720 loss = 0.330 (2.820 sec/step)
step 22730 loss = 0.317 (2.728 sec/step)
step 22740 loss = 0.328 (2.930 sec/step)
step 22750 loss = 0.325 (2.631 sec/step)
step 22760 loss = 0.362 (2.960 sec/step)
step 22770 loss = 0.308 (3.138 sec/step)
step 22780 loss = 0.337 (2.970 sec/step)
step 22790 loss = 0.329 (3.100 sec/step)
step 22800 loss = 0.340 (2.708 sec/step)
step 22810 loss = 0.312 (2.672 sec/step)
step 22820 loss = 0.312 (3.084 sec/step)
step 22830 loss = 0.327 (2.637 sec/step)
step 22840 loss = 0.299 (2.891 sec/step)
step 22850 loss = 0.313 (2.795 sec/step)
step 22860 loss = 0.319 (2.786 sec/step)
step 22870 loss = 0.329 (2.944 sec/step)
step 22880 loss = 0.336 (2.810 sec/step)
step 22890 loss = 0.313 (2.964 sec/step)
step 22900 loss = 0.328 (2.921 sec/step)
step 22910 loss = 0.312 (2.742 sec/step)
step 22920 loss = 0.329 (3.046 sec/step)
step 22930 loss = 0.339 (2.650 sec/step)
step 22940 loss = 0.342 (2.894 sec/step)
step 22950 loss = 0.283 (2.643 sec/step)
step 22960 loss = 0.337 (2.879 sec/step)
step 22970 loss = 0.345 (2.891 sec/step)
step 22980 loss = 0.311 (2.792 sec/step)
step 22990 loss = 0.313 (2.875 sec/step)
step 23000 loss = 0.323 (2.657 sec/step)
step 23010 loss = 0.320 (2.831 sec/step)
step 23020 loss = 0.328 (2.750 sec/step)
step 23030 loss = 0.336 (2.687 sec/step)
step 23040 loss = 0.335 (3.072 sec/step)
step 23050 loss = 0.325 (3.068 sec/step)
step 23060 loss = 0.323 (3.226 sec/step)
step 23070 loss = 0.335 (2.808 sec/step)
step 23080 loss = 0.328 (2.827 sec/step)
step 23090 loss = 0.331 (3.045 sec/step)
step 23100 loss = 0.339 (2.541 sec/step)
step 23110 loss = 0.313 (2.764 sec/step)
step 23120 loss = 0.326 (2.770 sec/step)
step 23130 loss = 0.346 (2.933 sec/step)
step 23140 loss = 0.327 (2.871 sec/step)
step 23150 loss = 0.336 (2.625 sec/step)
step 23160 loss = 0.330 (2.804 sec/step)
step 23170 loss = 0.310 (2.696 sec/step)
step 23180 loss = 0.321 (2.979 sec/step)
step 23190 loss = 0.329 (3.042 sec/step)
step 23200 loss = 0.324 (3.010 sec/step)
step 23210 loss = 0.344 (2.820 sec/step)
step 23220 loss = 0.342 (3.045 sec/step)
step 23230 loss = 0.339 (2.907 sec/step)
step 23240 loss = 0.309 (3.032 sec/step)
step 23250 loss = 0.319 (2.873 sec/step)
step 23260 loss = 0.326 (2.875 sec/step)
step 23270 loss = 0.340 (2.627 sec/step)
step 23280 loss = 0.341 (2.634 sec/step)
step 23290 loss = 0.356 (2.691 sec/step)
step 23300 loss = 0.329 (2.855 sec/step)
step 23310 loss = 0.331 (2.924 sec/step)
step 23320 loss = 0.330 (3.151 sec/step)
step 23330 loss = 0.350 (2.626 sec/step)
step 23340 loss = 0.338 (2.843 sec/step)
step 23350 loss = 0.328 (2.637 sec/step)
step 23360 loss = 0.317 (2.612 sec/step)
step 23370 loss = 0.344 (3.017 sec/step)
step 23380 loss = 0.299 (2.763 sec/step)
step 23390 loss = 0.315 (2.791 sec/step)
step 23400 loss = 0.320 (2.663 sec/step)
step 23410 loss = 0.315 (2.645 sec/step)
step 23420 loss = 0.339 (2.669 sec/step)
step 23430 loss = 0.307 (2.645 sec/step)
step 23440 loss = 0.336 (2.764 sec/step)
step 23450 loss = 0.346 (2.886 sec/step)
step 23460 loss = 0.314 (3.334 sec/step)
step 23470 loss = 0.334 (2.999 sec/step)
step 23480 loss = 0.331 (2.930 sec/step)
step 23490 loss = 0.323 (2.801 sec/step)
step 23500 loss = 0.308 (2.778 sec/step)
step 23510 loss = 0.323 (2.932 sec/step)
step 23520 loss = 0.330 (2.859 sec/step)
step 23530 loss = 0.315 (2.771 sec/step)
step 23540 loss = 0.334 (3.090 sec/step)
step 23550 loss = 0.323 (2.774 sec/step)
step 23560 loss = 0.296 (2.906 sec/step)
step 23570 loss = 0.321 (2.533 sec/step)
step 23580 loss = 0.329 (2.627 sec/step)
step 23590 loss = 0.347 (2.936 sec/step)
step 23600 loss = 0.350 (2.973 sec/step)
step 23610 loss = 0.327 (2.628 sec/step)
step 23620 loss = 0.304 (3.341 sec/step)
step 23630 loss = 0.331 (2.897 sec/step)
step 23640 loss = 0.340 (2.624 sec/step)
step 23650 loss = 0.324 (2.962 sec/step)
step 23660 loss = 0.312 (2.615 sec/step)
step 23670 loss = 0.326 (2.895 sec/step)
step 23680 loss = 0.313 (2.871 sec/step)
step 23690 loss = 0.308 (2.767 sec/step)
step 23700 loss = 0.312 (2.901 sec/step)
step 23710 loss = 0.332 (2.652 sec/step)
step 23720 loss = 0.329 (2.881 sec/step)
step 23730 loss = 0.323 (2.717 sec/step)
step 23740 loss = 0.321 (2.850 sec/step)
step 23750 loss = 0.326 (2.675 sec/step)
step 23760 loss = 0.340 (2.511 sec/step)
step 23770 loss = 0.330 (2.636 sec/step)
step 23780 loss = 0.314 (2.599 sec/step)
step 23790 loss = 0.334 (2.914 sec/step)
step 23800 loss = 0.340 (3.026 sec/step)
step 23810 loss = 0.349 (2.907 sec/step)
step 23820 loss = 0.308 (2.679 sec/step)
step 23830 loss = 0.327 (2.803 sec/step)
step 23840 loss = 0.328 (2.871 sec/step)
step 23850 loss = 0.339 (2.696 sec/step)
step 23860 loss = 0.321 (2.915 sec/step)
step 23870 loss = 0.336 (2.710 sec/step)
step 23880 loss = 0.338 (2.659 sec/step)
step 23890 loss = 0.334 (2.646 sec/step)
step 23900 loss = 0.313 (2.592 sec/step)
step 23910 loss = 0.329 (2.927 sec/step)
step 23920 loss = 0.316 (2.824 sec/step)
step 23930 loss = 0.347 (2.590 sec/step)
step 23940 loss = 0.320 (2.694 sec/step)
step 23950 loss = 0.325 (2.900 sec/step)
step 23960 loss = 0.337 (2.573 sec/step)
step 23970 loss = 0.328 (2.732 sec/step)
step 23980 loss = 0.324 (2.892 sec/step)
step 23990 loss = 0.332 (2.778 sec/step)
step 24000 loss = 0.307 (2.547 sec/step)
step 24010 loss = 0.335 (2.584 sec/step)
step 24020 loss = 0.340 (2.485 sec/step)
step 24030 loss = 0.338 (2.691 sec/step)
step 24040 loss = 0.335 (2.841 sec/step)
step 24050 loss = 0.332 (3.218 sec/step)
step 24060 loss = 0.310 (2.942 sec/step)
step 24070 loss = 0.313 (2.779 sec/step)
step 24080 loss = 0.322 (2.649 sec/step)
step 24090 loss = 0.352 (2.754 sec/step)
step 24100 loss = 0.319 (2.689 sec/step)
step 24110 loss = 0.330 (2.857 sec/step)
step 24120 loss = 0.356 (2.797 sec/step)
step 24130 loss = 0.355 (2.935 sec/step)
step 24140 loss = 0.348 (2.833 sec/step)
step 24150 loss = 0.323 (2.419 sec/step)
step 24160 loss = 0.322 (2.598 sec/step)
step 24170 loss = 0.329 (2.569 sec/step)
step 24180 loss = 0.330 (2.900 sec/step)
step 24190 loss = 0.326 (2.802 sec/step)
step 24200 loss = 0.328 (3.038 sec/step)
step 24210 loss = 0.322 (2.907 sec/step)
step 24220 loss = 0.304 (2.745 sec/step)
step 24230 loss = 0.361 (2.878 sec/step)
step 24240 loss = 0.332 (2.984 sec/step)
step 24250 loss = 0.365 (2.988 sec/step)
step 24260 loss = 0.307 (2.715 sec/step)
step 24270 loss = 0.322 (2.818 sec/step)
step 24280 loss = 0.330 (2.945 sec/step)
step 24290 loss = 0.316 (2.685 sec/step)
step 24300 loss = 0.327 (2.660 sec/step)
step 24310 loss = 0.331 (2.680 sec/step)
step 24320 loss = 0.328 (3.409 sec/step)
step 24330 loss = 0.335 (2.665 sec/step)
step 24340 loss = 0.329 (2.909 sec/step)
step 24350 loss = 0.311 (3.216 sec/step)
step 24360 loss = 0.345 (2.719 sec/step)
step 24370 loss = 0.336 (2.921 sec/step)
step 24380 loss = 0.341 (2.960 sec/step)
step 24390 loss = 0.323 (2.856 sec/step)
step 24400 loss = 0.316 (2.623 sec/step)
step 24410 loss = 0.335 (2.892 sec/step)
step 24420 loss = 0.341 (2.855 sec/step)
step 24430 loss = 0.327 (2.754 sec/step)
step 24440 loss = 0.327 (2.771 sec/step)
step 24450 loss = 0.347 (3.315 sec/step)
step 24460 loss = 0.323 (2.638 sec/step)
step 24470 loss = 0.330 (2.675 sec/step)
step 24480 loss = 0.319 (2.827 sec/step)
step 24490 loss = 0.323 (2.798 sec/step)
step 24500 loss = 0.324 (2.771 sec/step)
step 24510 loss = 0.348 (2.763 sec/step)
step 24520 loss = 0.335 (2.779 sec/step)
step 24530 loss = 0.332 (2.810 sec/step)
step 24540 loss = 0.361 (2.831 sec/step)
step 24550 loss = 0.334 (2.956 sec/step)
step 24560 loss = 0.327 (2.822 sec/step)
step 24570 loss = 0.326 (2.953 sec/step)
step 24580 loss = 0.339 (2.652 sec/step)
step 24590 loss = 0.307 (2.915 sec/step)
step 24600 loss = 0.344 (2.945 sec/step)
step 24610 loss = 0.320 (3.028 sec/step)
step 24620 loss = 0.344 (2.926 sec/step)
step 24630 loss = 0.361 (2.678 sec/step)
step 24640 loss = 0.335 (2.910 sec/step)
step 24650 loss = 0.319 (2.693 sec/step)
step 24660 loss = 0.319 (3.249 sec/step)
step 24670 loss = 0.332 (2.706 sec/step)
step 24680 loss = 0.347 (2.702 sec/step)
step 24690 loss = 0.346 (2.939 sec/step)
step 24700 loss = 0.340 (2.844 sec/step)
step 24710 loss = 0.322 (2.922 sec/step)
step 24720 loss = 0.337 (2.850 sec/step)
step 24730 loss = 0.324 (2.683 sec/step)
step 24740 loss = 0.320 (2.833 sec/step)
step 24750 loss = 0.338 (2.933 sec/step)
step 24760 loss = 0.316 (2.422 sec/step)
step 24770 loss = 0.311 (2.815 sec/step)
step 24780 loss = 0.330 (3.000 sec/step)
step 24790 loss = 0.322 (2.343 sec/step)
step 24800 loss = 0.325 (2.797 sec/step)
step 24810 loss = 0.313 (2.723 sec/step)
step 24820 loss = 0.306 (2.873 sec/step)
step 24830 loss = 0.322 (2.805 sec/step)
step 24840 loss = 0.329 (2.807 sec/step)
step 24850 loss = 0.322 (2.637 sec/step)
step 24860 loss = 0.340 (3.118 sec/step)
step 24870 loss = 0.339 (2.865 sec/step)
step 24880 loss = 0.340 (2.822 sec/step)
step 24890 loss = 0.359 (2.789 sec/step)
step 24900 loss = 0.332 (2.873 sec/step)
step 24910 loss = 0.346 (2.916 sec/step)
step 24920 loss = 0.336 (2.829 sec/step)
step 24930 loss = 0.308 (2.704 sec/step)
step 24940 loss = 0.315 (2.811 sec/step)
step 24950 loss = 0.327 (2.742 sec/step)
step 24960 loss = 0.334 (2.588 sec/step)
step 24970 loss = 0.325 (2.606 sec/step)
step 24980 loss = 0.324 (2.741 sec/step)
step 24990 loss = 0.326 (3.039 sec/step)
step 25000 loss = 0.319 (3.183 sec/step)
step 25010 loss = 0.324 (2.881 sec/step)
step 25020 loss = 0.335 (2.779 sec/step)
step 25030 loss = 0.340 (3.257 sec/step)
step 25040 loss = 0.336 (2.911 sec/step)
step 25050 loss = 0.318 (3.095 sec/step)
step 25060 loss = 0.343 (2.675 sec/step)
step 25070 loss = 0.345 (2.722 sec/step)
step 25080 loss = 0.327 (2.819 sec/step)
step 25090 loss = 0.328 (2.979 sec/step)
step 25100 loss = 0.326 (2.838 sec/step)
step 25110 loss = 0.328 (2.884 sec/step)
step 25120 loss = 0.326 (2.924 sec/step)
step 25130 loss = 0.339 (3.014 sec/step)
step 25140 loss = 0.336 (3.174 sec/step)
step 25150 loss = 0.325 (2.568 sec/step)
step 25160 loss = 0.328 (2.942 sec/step)
step 25170 loss = 0.332 (2.987 sec/step)
step 25180 loss = 0.317 (2.787 sec/step)
step 25190 loss = 0.342 (2.629 sec/step)
step 25200 loss = 0.329 (3.108 sec/step)
step 25210 loss = 0.352 (3.002 sec/step)
step 25220 loss = 0.338 (2.967 sec/step)
step 25230 loss = 0.331 (2.610 sec/step)
step 25240 loss = 0.340 (3.120 sec/step)
step 25250 loss = 0.337 (2.938 sec/step)
step 25260 loss = 0.342 (2.743 sec/step)
step 25270 loss = 0.330 (2.628 sec/step)
step 25280 loss = 0.315 (2.910 sec/step)
step 25290 loss = 0.327 (2.992 sec/step)
step 25300 loss = 0.322 (2.889 sec/step)
step 25310 loss = 0.311 (2.699 sec/step)
step 25320 loss = 0.359 (2.886 sec/step)
step 25330 loss = 0.344 (2.895 sec/step)
step 25340 loss = 0.355 (2.772 sec/step)
step 25350 loss = 0.344 (2.865 sec/step)
step 25360 loss = 0.338 (3.121 sec/step)
step 25370 loss = 0.324 (2.776 sec/step)
step 25380 loss = 0.347 (2.834 sec/step)
step 25390 loss = 0.341 (2.842 sec/step)
step 25400 loss = 0.360 (2.781 sec/step)
step 25410 loss = 0.337 (2.712 sec/step)
step 25420 loss = 0.327 (2.533 sec/step)
step 25430 loss = 0.332 (2.635 sec/step)
step 25440 loss = 0.339 (2.750 sec/step)
step 25450 loss = 0.352 (2.670 sec/step)
step 25460 loss = 0.340 (2.944 sec/step)
step 25470 loss = 0.333 (2.604 sec/step)
step 25480 loss = 0.323 (3.109 sec/step)
step 25490 loss = 0.353 (2.662 sec/step)
step 25500 loss = 0.334 (2.832 sec/step)
step 25510 loss = 0.333 (2.974 sec/step)
step 25520 loss = 0.333 (2.929 sec/step)
step 25530 loss = 0.336 (2.921 sec/step)
step 25540 loss = 0.322 (2.903 sec/step)
step 25550 loss = 0.310 (2.925 sec/step)
step 25560 loss = 0.329 (2.861 sec/step)
step 25570 loss = 0.325 (2.903 sec/step)
step 25580 loss = 0.305 (2.597 sec/step)
step 25590 loss = 0.313 (2.552 sec/step)
step 25600 loss = 0.343 (2.906 sec/step)
step 25610 loss = 0.340 (2.960 sec/step)
step 25620 loss = 0.328 (3.274 sec/step)
step 25630 loss = 0.326 (2.839 sec/step)
step 25640 loss = 0.350 (2.904 sec/step)
step 25650 loss = 0.331 (2.843 sec/step)
step 25660 loss = 0.351 (2.909 sec/step)
step 25670 loss = 0.347 (2.682 sec/step)
step 25680 loss = 0.337 (2.935 sec/step)
step 25690 loss = 0.314 (2.902 sec/step)
step 25700 loss = 0.318 (3.023 sec/step)
step 25710 loss = 0.355 (2.736 sec/step)
step 25720 loss = 0.362 (2.907 sec/step)
step 25730 loss = 0.331 (2.736 sec/step)
step 25740 loss = 0.347 (2.897 sec/step)
step 25750 loss = 0.345 (2.929 sec/step)
step 25760 loss = 0.331 (2.802 sec/step)
step 25770 loss = 0.329 (2.956 sec/step)
step 25780 loss = 0.354 (2.715 sec/step)
step 25790 loss = 0.345 (2.891 sec/step)
step 25800 loss = 0.334 (2.803 sec/step)
step 25810 loss = 0.320 (3.084 sec/step)
step 25820 loss = 0.333 (2.859 sec/step)
step 25830 loss = 0.337 (2.906 sec/step)
step 25840 loss = 0.299 (2.735 sec/step)
step 25850 loss = 0.344 (3.102 sec/step)
step 25860 loss = 0.354 (2.706 sec/step)
step 25870 loss = 0.343 (2.866 sec/step)
step 25880 loss = 0.326 (2.627 sec/step)
step 25890 loss = 0.368 (2.780 sec/step)
step 25900 loss = 0.347 (3.057 sec/step)
step 25910 loss = 0.332 (3.151 sec/step)
step 25920 loss = 0.329 (2.914 sec/step)
step 25930 loss = 0.336 (2.827 sec/step)
step 25940 loss = 0.329 (2.667 sec/step)
step 25950 loss = 0.314 (2.872 sec/step)
Training Epoch 7/100
step 25960 loss = 0.298 (2.836 sec/step)
step 25970 loss = 0.299 (3.027 sec/step)
step 25980 loss = 0.292 (2.735 sec/step)
step 25990 loss = 0.298 (2.761 sec/step)
step 26000 loss = 0.279 (2.993 sec/step)
step 26010 loss = 0.320 (2.766 sec/step)
step 26020 loss = 0.280 (2.811 sec/step)
step 26030 loss = 0.299 (2.767 sec/step)
step 26040 loss = 0.307 (2.933 sec/step)
step 26050 loss = 0.309 (3.008 sec/step)
step 26060 loss = 0.305 (2.967 sec/step)
step 26070 loss = 0.284 (3.016 sec/step)
step 26080 loss = 0.294 (2.527 sec/step)
step 26090 loss = 0.299 (2.782 sec/step)
step 26100 loss = 0.281 (2.796 sec/step)
step 26110 loss = 0.306 (3.152 sec/step)
step 26120 loss = 0.290 (2.846 sec/step)
step 26130 loss = 0.312 (2.822 sec/step)
step 26140 loss = 0.311 (2.979 sec/step)
step 26150 loss = 0.306 (2.987 sec/step)
step 26160 loss = 0.284 (2.853 sec/step)
step 26170 loss = 0.276 (2.852 sec/step)
step 26180 loss = 0.304 (2.879 sec/step)
step 26190 loss = 0.312 (3.442 sec/step)
step 26200 loss = 0.298 (2.725 sec/step)
step 26210 loss = 0.309 (2.944 sec/step)
step 26220 loss = 0.285 (3.206 sec/step)
step 26230 loss = 0.303 (2.880 sec/step)
step 26240 loss = 0.285 (3.078 sec/step)
step 26250 loss = 0.291 (2.696 sec/step)
step 26260 loss = 0.288 (2.857 sec/step)
step 26270 loss = 0.310 (2.570 sec/step)
step 26280 loss = 0.294 (3.078 sec/step)
step 26290 loss = 0.282 (2.895 sec/step)
step 26300 loss = 0.313 (2.730 sec/step)
step 26310 loss = 0.314 (2.887 sec/step)
step 26320 loss = 0.308 (2.699 sec/step)
step 26330 loss = 0.296 (2.978 sec/step)
step 26340 loss = 0.304 (2.838 sec/step)
step 26350 loss = 0.289 (3.076 sec/step)
step 26360 loss = 0.297 (3.060 sec/step)
step 26370 loss = 0.325 (2.720 sec/step)
step 26380 loss = 0.319 (2.891 sec/step)
step 26390 loss = 0.300 (2.964 sec/step)
step 26400 loss = 0.313 (3.260 sec/step)
step 26410 loss = 0.302 (2.695 sec/step)
step 26420 loss = 0.300 (2.772 sec/step)
step 26430 loss = 0.279 (3.111 sec/step)
step 26440 loss = 0.320 (3.155 sec/step)
step 26450 loss = 0.292 (2.857 sec/step)
step 26460 loss = 0.274 (2.709 sec/step)
step 26470 loss = 0.297 (2.757 sec/step)
step 26480 loss = 0.304 (2.898 sec/step)
step 26490 loss = 0.306 (2.553 sec/step)
step 26500 loss = 0.280 (3.022 sec/step)
step 26510 loss = 0.295 (2.844 sec/step)
step 26520 loss = 0.282 (2.726 sec/step)
step 26530 loss = 0.293 (2.896 sec/step)
step 26540 loss = 0.316 (2.643 sec/step)
step 26550 loss = 0.302 (2.453 sec/step)
step 26560 loss = 0.301 (2.946 sec/step)
step 26570 loss = 0.320 (2.863 sec/step)
step 26580 loss = 0.298 (2.860 sec/step)
step 26590 loss = 0.296 (3.424 sec/step)
step 26600 loss = 0.294 (2.834 sec/step)
step 26610 loss = 0.286 (3.035 sec/step)
step 26620 loss = 0.300 (2.926 sec/step)
step 26630 loss = 0.299 (2.971 sec/step)
step 26640 loss = 0.286 (2.681 sec/step)
step 26650 loss = 0.315 (2.478 sec/step)
step 26660 loss = 0.303 (2.908 sec/step)
step 26670 loss = 0.291 (2.709 sec/step)
step 26680 loss = 0.311 (2.619 sec/step)
step 26690 loss = 0.306 (2.742 sec/step)
step 26700 loss = 0.306 (3.038 sec/step)
step 26710 loss = 0.288 (2.619 sec/step)
step 26720 loss = 0.292 (2.821 sec/step)
step 26730 loss = 0.300 (2.601 sec/step)
step 26740 loss = 0.291 (2.764 sec/step)
step 26750 loss = 0.308 (2.592 sec/step)
step 26760 loss = 0.287 (2.817 sec/step)
step 26770 loss = 0.313 (2.831 sec/step)
step 26780 loss = 0.294 (2.791 sec/step)
step 26790 loss = 0.292 (3.051 sec/step)
step 26800 loss = 0.286 (2.662 sec/step)
step 26810 loss = 0.304 (2.713 sec/step)
step 26820 loss = 0.321 (2.563 sec/step)
step 26830 loss = 0.274 (2.914 sec/step)
step 26840 loss = 0.314 (3.100 sec/step)
step 26850 loss = 0.320 (2.523 sec/step)
step 26860 loss = 0.303 (2.872 sec/step)
step 26870 loss = 0.291 (2.998 sec/step)
step 26880 loss = 0.317 (3.019 sec/step)
step 26890 loss = 0.324 (2.776 sec/step)
step 26900 loss = 0.307 (2.658 sec/step)
step 26910 loss = 0.295 (2.904 sec/step)
step 26920 loss = 0.289 (2.796 sec/step)
step 26930 loss = 0.302 (2.478 sec/step)
step 26940 loss = 0.304 (2.530 sec/step)
step 26950 loss = 0.298 (2.277 sec/step)
step 26960 loss = 0.308 (2.456 sec/step)
step 26970 loss = 0.298 (2.416 sec/step)
step 26980 loss = 0.324 (2.457 sec/step)
step 26990 loss = 0.292 (2.285 sec/step)
step 27000 loss = 0.302 (2.286 sec/step)
step 27010 loss = 0.287 (2.276 sec/step)
step 27020 loss = 0.294 (2.335 sec/step)
step 27030 loss = 0.304 (2.230 sec/step)
step 27040 loss = 0.302 (2.267 sec/step)
step 27050 loss = 0.291 (4.097 sec/step)
step 27060 loss = 0.317 (2.388 sec/step)
step 27070 loss = 0.314 (2.649 sec/step)
step 27080 loss = 0.304 (3.269 sec/step)
step 27090 loss = 0.312 (2.723 sec/step)
step 27100 loss = 0.294 (2.599 sec/step)
step 27110 loss = 0.327 (3.247 sec/step)
step 27120 loss = 0.288 (2.744 sec/step)
step 27130 loss = 0.299 (2.920 sec/step)
step 27140 loss = 0.315 (2.877 sec/step)
step 27150 loss = 0.299 (3.030 sec/step)
step 27160 loss = 0.323 (2.739 sec/step)
step 27170 loss = 0.288 (2.860 sec/step)
step 27180 loss = 0.309 (2.620 sec/step)
step 27190 loss = 0.295 (3.198 sec/step)
step 27200 loss = 0.304 (2.773 sec/step)
step 27210 loss = 0.298 (2.700 sec/step)
step 27220 loss = 0.292 (2.571 sec/step)
step 27230 loss = 0.287 (2.829 sec/step)
step 27240 loss = 0.312 (3.036 sec/step)
step 27250 loss = 0.324 (2.803 sec/step)
step 27260 loss = 0.313 (2.773 sec/step)
step 27270 loss = 0.306 (2.858 sec/step)
step 27280 loss = 0.308 (2.709 sec/step)
step 27290 loss = 0.326 (3.022 sec/step)
step 27300 loss = 0.334 (2.860 sec/step)
step 27310 loss = 0.296 (2.918 sec/step)
step 27320 loss = 0.288 (2.768 sec/step)
step 27330 loss = 0.305 (2.557 sec/step)
step 27340 loss = 0.304 (2.757 sec/step)
step 27350 loss = 0.307 (2.622 sec/step)
step 27360 loss = 0.288 (3.028 sec/step)
step 27370 loss = 0.294 (3.291 sec/step)
step 27380 loss = 0.320 (2.778 sec/step)
step 27390 loss = 0.305 (2.858 sec/step)
step 27400 loss = 0.309 (3.038 sec/step)
step 27410 loss = 0.298 (2.677 sec/step)
step 27420 loss = 0.306 (2.853 sec/step)
step 27430 loss = 0.305 (2.802 sec/step)
step 27440 loss = 0.305 (2.925 sec/step)
step 27450 loss = 0.319 (2.881 sec/step)
step 27460 loss = 0.305 (2.907 sec/step)
step 27470 loss = 0.315 (2.579 sec/step)
step 27480 loss = 0.288 (3.063 sec/step)
step 27490 loss = 0.324 (2.836 sec/step)
step 27500 loss = 0.314 (2.615 sec/step)
step 27510 loss = 0.324 (2.558 sec/step)
step 27520 loss = 0.302 (2.827 sec/step)
step 27530 loss = 0.296 (2.552 sec/step)
step 27540 loss = 0.296 (2.992 sec/step)
step 27550 loss = 0.302 (2.891 sec/step)
step 27560 loss = 0.285 (3.261 sec/step)
step 27570 loss = 0.317 (2.815 sec/step)
step 27580 loss = 0.306 (2.572 sec/step)
step 27590 loss = 0.306 (2.847 sec/step)
step 27600 loss = 0.291 (2.855 sec/step)
step 27610 loss = 0.325 (2.898 sec/step)
step 27620 loss = 0.297 (2.888 sec/step)
step 27630 loss = 0.302 (2.857 sec/step)
step 27640 loss = 0.301 (3.098 sec/step)
step 27650 loss = 0.325 (2.677 sec/step)
step 27660 loss = 0.319 (3.029 sec/step)
step 27670 loss = 0.301 (3.070 sec/step)
step 27680 loss = 0.277 (2.963 sec/step)
step 27690 loss = 0.320 (2.883 sec/step)
step 27700 loss = 0.292 (2.732 sec/step)
step 27710 loss = 0.319 (2.952 sec/step)
step 27720 loss = 0.312 (2.584 sec/step)
step 27730 loss = 0.295 (2.820 sec/step)
step 27740 loss = 0.283 (2.908 sec/step)
step 27750 loss = 0.305 (2.730 sec/step)
step 27760 loss = 0.316 (3.019 sec/step)
step 27770 loss = 0.302 (3.157 sec/step)
step 27780 loss = 0.298 (2.827 sec/step)
step 27790 loss = 0.320 (2.664 sec/step)
step 27800 loss = 0.310 (2.693 sec/step)
step 27810 loss = 0.299 (2.853 sec/step)
step 27820 loss = 0.289 (2.701 sec/step)
step 27830 loss = 0.293 (2.894 sec/step)
step 27840 loss = 0.318 (2.935 sec/step)
step 27850 loss = 0.285 (2.605 sec/step)
step 27860 loss = 0.283 (2.999 sec/step)
step 27870 loss = 0.306 (3.070 sec/step)
step 27880 loss = 0.303 (2.887 sec/step)
step 27890 loss = 0.289 (2.503 sec/step)
step 27900 loss = 0.334 (2.642 sec/step)
step 27910 loss = 0.308 (2.599 sec/step)
step 27920 loss = 0.293 (2.770 sec/step)
step 27930 loss = 0.308 (2.554 sec/step)
step 27940 loss = 0.304 (2.659 sec/step)
step 27950 loss = 0.300 (2.992 sec/step)
step 27960 loss = 0.326 (2.521 sec/step)
step 27970 loss = 0.308 (2.923 sec/step)
step 27980 loss = 0.298 (2.905 sec/step)
step 27990 loss = 0.299 (2.668 sec/step)
step 28000 loss = 0.307 (2.536 sec/step)
step 28010 loss = 0.287 (2.703 sec/step)
step 28020 loss = 0.302 (2.735 sec/step)
step 28030 loss = 0.301 (2.660 sec/step)
step 28040 loss = 0.306 (2.882 sec/step)
step 28050 loss = 0.315 (2.697 sec/step)
step 28060 loss = 0.284 (3.171 sec/step)
step 28070 loss = 0.306 (2.703 sec/step)
step 28080 loss = 0.298 (2.803 sec/step)
step 28090 loss = 0.330 (3.030 sec/step)
step 28100 loss = 0.319 (2.853 sec/step)
step 28110 loss = 0.310 (3.014 sec/step)
step 28120 loss = 0.316 (3.034 sec/step)
step 28130 loss = 0.313 (2.839 sec/step)
step 28140 loss = 0.311 (2.777 sec/step)
step 28150 loss = 0.327 (2.904 sec/step)
step 28160 loss = 0.302 (2.633 sec/step)
step 28170 loss = 0.301 (2.685 sec/step)
step 28180 loss = 0.308 (2.871 sec/step)
step 28190 loss = 0.310 (2.681 sec/step)
step 28200 loss = 0.312 (2.607 sec/step)
step 28210 loss = 0.328 (2.965 sec/step)
step 28220 loss = 0.315 (3.032 sec/step)
step 28230 loss = 0.315 (2.861 sec/step)
step 28240 loss = 0.320 (2.801 sec/step)
step 28250 loss = 0.314 (2.609 sec/step)
step 28260 loss = 0.323 (2.609 sec/step)
step 28270 loss = 0.283 (2.797 sec/step)
step 28280 loss = 0.287 (2.663 sec/step)
step 28290 loss = 0.278 (2.969 sec/step)
step 28300 loss = 0.334 (2.861 sec/step)
step 28310 loss = 0.320 (2.885 sec/step)
step 28320 loss = 0.327 (2.622 sec/step)
step 28330 loss = 0.326 (2.727 sec/step)
step 28340 loss = 0.301 (2.749 sec/step)
step 28350 loss = 0.291 (2.812 sec/step)
step 28360 loss = 0.313 (2.683 sec/step)
step 28370 loss = 0.311 (2.701 sec/step)
step 28380 loss = 0.311 (3.132 sec/step)
step 28390 loss = 0.304 (2.736 sec/step)
step 28400 loss = 0.305 (2.793 sec/step)
step 28410 loss = 0.308 (3.042 sec/step)
step 28420 loss = 0.316 (2.708 sec/step)
step 28430 loss = 0.288 (2.665 sec/step)
step 28440 loss = 0.295 (2.811 sec/step)
step 28450 loss = 0.290 (2.846 sec/step)
step 28460 loss = 0.303 (2.772 sec/step)
step 28470 loss = 0.302 (2.587 sec/step)
step 28480 loss = 0.320 (2.856 sec/step)
step 28490 loss = 0.326 (2.810 sec/step)
step 28500 loss = 0.314 (3.107 sec/step)
step 28510 loss = 0.298 (2.840 sec/step)
step 28520 loss = 0.309 (2.714 sec/step)
step 28530 loss = 0.310 (3.012 sec/step)
step 28540 loss = 0.312 (2.867 sec/step)
step 28550 loss = 0.302 (2.421 sec/step)
step 28560 loss = 0.281 (2.574 sec/step)
step 28570 loss = 0.325 (2.926 sec/step)
step 28580 loss = 0.315 (2.685 sec/step)
step 28590 loss = 0.329 (2.894 sec/step)
step 28600 loss = 0.310 (3.070 sec/step)
step 28610 loss = 0.321 (2.860 sec/step)
step 28620 loss = 0.325 (2.510 sec/step)
step 28630 loss = 0.319 (2.884 sec/step)
step 28640 loss = 0.312 (2.856 sec/step)
step 28650 loss = 0.295 (2.679 sec/step)
step 28660 loss = 0.321 (2.674 sec/step)
step 28670 loss = 0.293 (2.762 sec/step)
step 28680 loss = 0.322 (2.638 sec/step)
step 28690 loss = 0.315 (2.595 sec/step)
step 28700 loss = 0.309 (2.883 sec/step)
step 28710 loss = 0.317 (2.928 sec/step)
step 28720 loss = 0.298 (2.464 sec/step)
step 28730 loss = 0.324 (2.647 sec/step)
step 28740 loss = 0.298 (2.599 sec/step)
step 28750 loss = 0.314 (2.732 sec/step)
step 28760 loss = 0.306 (2.723 sec/step)
step 28770 loss = 0.294 (2.969 sec/step)
step 28780 loss = 0.320 (2.939 sec/step)
step 28790 loss = 0.332 (3.004 sec/step)
step 28800 loss = 0.306 (3.034 sec/step)
step 28810 loss = 0.343 (2.883 sec/step)
step 28820 loss = 0.307 (2.765 sec/step)
step 28830 loss = 0.313 (3.075 sec/step)
step 28840 loss = 0.312 (3.161 sec/step)
step 28850 loss = 0.301 (2.830 sec/step)
step 28860 loss = 0.316 (2.786 sec/step)
step 28870 loss = 0.287 (2.852 sec/step)
step 28880 loss = 0.306 (3.120 sec/step)
step 28890 loss = 0.320 (2.797 sec/step)
step 28900 loss = 0.311 (2.799 sec/step)
step 28910 loss = 0.319 (2.740 sec/step)
step 28920 loss = 0.306 (3.066 sec/step)
step 28930 loss = 0.305 (2.856 sec/step)
step 28940 loss = 0.312 (2.572 sec/step)
step 28950 loss = 0.322 (2.926 sec/step)
step 28960 loss = 0.316 (2.761 sec/step)
step 28970 loss = 0.309 (2.631 sec/step)
step 28980 loss = 0.320 (2.787 sec/step)
step 28990 loss = 0.302 (2.998 sec/step)
step 29000 loss = 0.327 (2.782 sec/step)
step 29010 loss = 0.327 (3.205 sec/step)
step 29020 loss = 0.298 (2.922 sec/step)
step 29030 loss = 0.297 (3.109 sec/step)
step 29040 loss = 0.284 (3.020 sec/step)
step 29050 loss = 0.331 (2.801 sec/step)
step 29060 loss = 0.300 (2.555 sec/step)
step 29070 loss = 0.305 (2.904 sec/step)
step 29080 loss = 0.310 (2.576 sec/step)
step 29090 loss = 0.310 (2.769 sec/step)
step 29100 loss = 0.289 (2.875 sec/step)
step 29110 loss = 0.301 (3.191 sec/step)
step 29120 loss = 0.303 (3.012 sec/step)
step 29130 loss = 0.327 (2.684 sec/step)
step 29140 loss = 0.316 (2.801 sec/step)
step 29150 loss = 0.281 (2.687 sec/step)
step 29160 loss = 0.314 (2.892 sec/step)
step 29170 loss = 0.321 (3.096 sec/step)
step 29180 loss = 0.288 (2.539 sec/step)
step 29190 loss = 0.299 (2.894 sec/step)
step 29200 loss = 0.319 (2.751 sec/step)
step 29210 loss = 0.322 (2.521 sec/step)
step 29220 loss = 0.307 (2.962 sec/step)
step 29230 loss = 0.312 (3.115 sec/step)
step 29240 loss = 0.314 (2.919 sec/step)
step 29250 loss = 0.301 (2.648 sec/step)
step 29260 loss = 0.303 (3.114 sec/step)
step 29270 loss = 0.289 (3.089 sec/step)
step 29280 loss = 0.290 (2.904 sec/step)
step 29290 loss = 0.311 (2.908 sec/step)
step 29300 loss = 0.303 (2.760 sec/step)
step 29310 loss = 0.322 (2.816 sec/step)
step 29320 loss = 0.311 (2.722 sec/step)
step 29330 loss = 0.325 (2.539 sec/step)
step 29340 loss = 0.294 (2.771 sec/step)
step 29350 loss = 0.314 (2.771 sec/step)
step 29360 loss = 0.328 (3.188 sec/step)
step 29370 loss = 0.315 (2.795 sec/step)
step 29380 loss = 0.314 (2.691 sec/step)
step 29390 loss = 0.327 (2.755 sec/step)
step 29400 loss = 0.309 (2.790 sec/step)
step 29410 loss = 0.323 (2.441 sec/step)
step 29420 loss = 0.320 (3.399 sec/step)
step 29430 loss = 0.290 (2.651 sec/step)
step 29440 loss = 0.334 (2.803 sec/step)
step 29450 loss = 0.327 (2.918 sec/step)
step 29460 loss = 0.305 (2.827 sec/step)
step 29470 loss = 0.305 (2.758 sec/step)
step 29480 loss = 0.291 (2.740 sec/step)
step 29490 loss = 0.308 (2.907 sec/step)
step 29500 loss = 0.319 (3.071 sec/step)
step 29510 loss = 0.309 (2.912 sec/step)
step 29520 loss = 0.317 (2.989 sec/step)
step 29530 loss = 0.303 (2.555 sec/step)
step 29540 loss = 0.326 (2.761 sec/step)
step 29550 loss = 0.296 (2.821 sec/step)
step 29560 loss = 0.318 (2.982 sec/step)
step 29570 loss = 0.315 (3.039 sec/step)
step 29580 loss = 0.307 (2.853 sec/step)
step 29590 loss = 0.322 (2.747 sec/step)
step 29600 loss = 0.302 (2.826 sec/step)
step 29610 loss = 0.305 (2.545 sec/step)
step 29620 loss = 0.291 (2.735 sec/step)
step 29630 loss = 0.332 (3.023 sec/step)
step 29640 loss = 0.312 (2.830 sec/step)
step 29650 loss = 0.320 (2.899 sec/step)
step 29660 loss = 0.316 (3.089 sec/step)
Training Epoch 8/100
step 29670 loss = 0.288 (2.945 sec/step)
step 29680 loss = 0.273 (2.587 sec/step)
step 29690 loss = 0.284 (2.881 sec/step)
step 29700 loss = 0.277 (2.737 sec/step)
step 29710 loss = 0.289 (3.000 sec/step)
step 29720 loss = 0.284 (3.013 sec/step)
step 29730 loss = 0.289 (3.063 sec/step)
step 29740 loss = 0.299 (2.666 sec/step)
step 29750 loss = 0.275 (2.548 sec/step)
step 29760 loss = 0.286 (2.934 sec/step)
step 29770 loss = 0.273 (2.640 sec/step)
step 29780 loss = 0.271 (2.948 sec/step)
step 29790 loss = 0.279 (2.900 sec/step)
step 29800 loss = 0.279 (2.568 sec/step)
step 29810 loss = 0.293 (2.689 sec/step)
step 29820 loss = 0.272 (2.870 sec/step)
step 29830 loss = 0.288 (2.928 sec/step)
step 29840 loss = 0.271 (2.742 sec/step)
step 29850 loss = 0.275 (2.928 sec/step)
step 29860 loss = 0.311 (2.865 sec/step)
step 29870 loss = 0.257 (2.822 sec/step)
step 29880 loss = 0.285 (2.732 sec/step)
step 29890 loss = 0.280 (2.777 sec/step)
step 29900 loss = 0.280 (2.915 sec/step)
step 29910 loss = 0.269 (2.640 sec/step)
step 29920 loss = 0.260 (2.806 sec/step)
step 29930 loss = 0.287 (2.635 sec/step)
step 29940 loss = 0.276 (2.766 sec/step)
step 29950 loss = 0.292 (2.547 sec/step)
step 29960 loss = 0.285 (3.029 sec/step)
step 29970 loss = 0.282 (2.694 sec/step)
step 29980 loss = 0.287 (2.665 sec/step)
step 29990 loss = 0.291 (2.827 sec/step)
step 30000 loss = 0.288 (2.915 sec/step)
step 30010 loss = 0.286 (3.060 sec/step)
step 30020 loss = 0.266 (2.910 sec/step)
step 30030 loss = 0.289 (2.734 sec/step)
step 30040 loss = 0.291 (2.927 sec/step)
step 30050 loss = 0.293 (2.892 sec/step)
step 30060 loss = 0.284 (2.790 sec/step)
step 30070 loss = 0.266 (2.583 sec/step)
step 30080 loss = 0.278 (2.768 sec/step)
step 30090 loss = 0.267 (2.820 sec/step)
step 30100 loss = 0.278 (2.699 sec/step)
step 30110 loss = 0.271 (2.537 sec/step)
step 30120 loss = 0.272 (2.878 sec/step)
step 30130 loss = 0.278 (2.999 sec/step)
step 30140 loss = 0.293 (2.666 sec/step)
step 30150 loss = 0.269 (2.624 sec/step)
step 30160 loss = 0.284 (3.117 sec/step)
step 30170 loss = 0.269 (2.687 sec/step)
step 30180 loss = 0.273 (2.819 sec/step)
step 30190 loss = 0.286 (2.817 sec/step)
step 30200 loss = 0.284 (2.548 sec/step)
step 30210 loss = 0.283 (2.951 sec/step)
step 30220 loss = 0.275 (3.257 sec/step)
step 30230 loss = 0.276 (2.739 sec/step)
step 30240 loss = 0.278 (2.559 sec/step)
step 30250 loss = 0.280 (2.767 sec/step)
step 30260 loss = 0.288 (2.875 sec/step)
step 30270 loss = 0.277 (2.927 sec/step)
step 30280 loss = 0.282 (2.665 sec/step)
step 30290 loss = 0.290 (2.941 sec/step)
step 30300 loss = 0.278 (2.846 sec/step)
step 30310 loss = 0.277 (2.885 sec/step)
step 30320 loss = 0.273 (2.868 sec/step)
step 30330 loss = 0.271 (3.116 sec/step)
step 30340 loss = 0.251 (2.622 sec/step)
step 30350 loss = 0.276 (2.869 sec/step)
step 30360 loss = 0.267 (3.105 sec/step)
step 30370 loss = 0.279 (2.763 sec/step)
step 30380 loss = 0.278 (2.685 sec/step)
step 30390 loss = 0.291 (3.082 sec/step)
step 30400 loss = 0.282 (2.636 sec/step)
step 30410 loss = 0.275 (2.587 sec/step)
step 30420 loss = 0.272 (2.768 sec/step)
step 30430 loss = 0.284 (2.895 sec/step)
step 30440 loss = 0.285 (2.749 sec/step)
step 30450 loss = 0.267 (2.802 sec/step)
step 30460 loss = 0.274 (2.676 sec/step)
step 30470 loss = 0.275 (3.026 sec/step)
step 30480 loss = 0.296 (3.084 sec/step)
step 30490 loss = 0.282 (2.683 sec/step)
step 30500 loss = 0.279 (2.804 sec/step)
step 30510 loss = 0.277 (2.815 sec/step)
step 30520 loss = 0.296 (2.947 sec/step)
step 30530 loss = 0.287 (3.035 sec/step)
step 30540 loss = 0.284 (2.834 sec/step)
step 30550 loss = 0.274 (2.884 sec/step)
step 30560 loss = 0.288 (2.580 sec/step)
step 30570 loss = 0.285 (2.686 sec/step)
step 30580 loss = 0.267 (2.942 sec/step)
step 30590 loss = 0.272 (2.801 sec/step)
step 30600 loss = 0.283 (2.623 sec/step)
step 30610 loss = 0.280 (2.515 sec/step)
step 30620 loss = 0.270 (2.651 sec/step)
step 30630 loss = 0.276 (3.290 sec/step)
step 30640 loss = 0.283 (2.629 sec/step)
step 30650 loss = 0.289 (2.766 sec/step)
step 30660 loss = 0.281 (2.926 sec/step)
step 30670 loss = 0.277 (2.619 sec/step)
step 30680 loss = 0.281 (2.806 sec/step)
step 30690 loss = 0.287 (2.568 sec/step)
step 30700 loss = 0.282 (2.629 sec/step)
step 30710 loss = 0.301 (3.095 sec/step)
step 30720 loss = 0.272 (2.811 sec/step)
step 30730 loss = 0.291 (2.845 sec/step)
step 30740 loss = 0.280 (2.842 sec/step)
step 30750 loss = 0.264 (2.775 sec/step)
step 30760 loss = 0.271 (2.810 sec/step)
step 30770 loss = 0.302 (2.916 sec/step)
step 30780 loss = 0.287 (2.740 sec/step)
step 30790 loss = 0.291 (2.634 sec/step)
step 30800 loss = 0.280 (2.918 sec/step)
step 30810 loss = 0.274 (2.837 sec/step)
step 30820 loss = 0.294 (2.944 sec/step)
step 30830 loss = 0.291 (3.023 sec/step)
step 30840 loss = 0.277 (2.525 sec/step)
step 30850 loss = 0.282 (2.977 sec/step)
step 30860 loss = 0.276 (2.778 sec/step)
step 30870 loss = 0.273 (2.863 sec/step)
step 30880 loss = 0.278 (2.931 sec/step)
step 30890 loss = 0.282 (3.210 sec/step)
step 30900 loss = 0.297 (3.097 sec/step)
step 30910 loss = 0.263 (3.253 sec/step)
step 30920 loss = 0.277 (2.827 sec/step)
step 30930 loss = 0.273 (2.687 sec/step)
step 30940 loss = 0.277 (3.056 sec/step)
step 30950 loss = 0.273 (2.954 sec/step)
step 30960 loss = 0.283 (2.932 sec/step)
step 30970 loss = 0.273 (3.007 sec/step)
step 30980 loss = 0.271 (2.733 sec/step)
step 30990 loss = 0.280 (3.003 sec/step)
step 31000 loss = 0.287 (2.894 sec/step)
step 31010 loss = 0.255 (2.790 sec/step)
step 31020 loss = 0.283 (2.763 sec/step)
step 31030 loss = 0.290 (2.455 sec/step)
step 31040 loss = 0.284 (2.833 sec/step)
step 31050 loss = 0.285 (2.760 sec/step)
step 31060 loss = 0.279 (2.994 sec/step)
step 31070 loss = 0.280 (3.062 sec/step)
step 31080 loss = 0.251 (2.634 sec/step)
step 31090 loss = 0.304 (2.882 sec/step)
step 31100 loss = 0.278 (2.805 sec/step)
step 31110 loss = 0.285 (2.781 sec/step)
step 31120 loss = 0.292 (2.606 sec/step)
step 31130 loss = 0.291 (3.194 sec/step)
step 31140 loss = 0.298 (3.014 sec/step)
step 31150 loss = 0.286 (3.104 sec/step)
step 31160 loss = 0.291 (3.113 sec/step)
step 31170 loss = 0.289 (2.849 sec/step)
step 31180 loss = 0.287 (3.125 sec/step)
step 31190 loss = 0.282 (2.591 sec/step)
step 31200 loss = 0.296 (2.915 sec/step)
step 31210 loss = 0.275 (2.653 sec/step)
step 31220 loss = 0.268 (2.798 sec/step)
step 31230 loss = 0.282 (3.149 sec/step)
step 31240 loss = 0.284 (3.161 sec/step)
step 31250 loss = 0.277 (2.956 sec/step)
step 31260 loss = 0.291 (2.484 sec/step)
step 31270 loss = 0.271 (2.630 sec/step)
step 31280 loss = 0.271 (2.779 sec/step)
step 31290 loss = 0.284 (2.929 sec/step)
step 31300 loss = 0.278 (2.625 sec/step)
step 31310 loss = 0.278 (2.764 sec/step)
step 31320 loss = 0.296 (2.641 sec/step)
step 31330 loss = 0.275 (2.976 sec/step)
step 31340 loss = 0.285 (2.766 sec/step)
step 31350 loss = 0.280 (3.065 sec/step)
step 31360 loss = 0.286 (2.819 sec/step)
step 31370 loss = 0.298 (2.585 sec/step)
step 31380 loss = 0.276 (2.762 sec/step)
step 31390 loss = 0.285 (2.988 sec/step)
step 31400 loss = 0.297 (2.688 sec/step)
step 31410 loss = 0.291 (2.837 sec/step)
step 31420 loss = 0.298 (2.853 sec/step)
step 31430 loss = 0.301 (2.701 sec/step)
step 31440 loss = 0.318 (2.798 sec/step)
step 31450 loss = 0.279 (2.936 sec/step)
step 31460 loss = 0.277 (2.768 sec/step)
step 31470 loss = 0.279 (3.023 sec/step)
step 31480 loss = 0.295 (2.608 sec/step)
step 31490 loss = 0.285 (2.664 sec/step)
step 31500 loss = 0.308 (2.793 sec/step)
step 31510 loss = 0.291 (2.658 sec/step)
step 31520 loss = 0.276 (2.564 sec/step)
step 31530 loss = 0.274 (3.079 sec/step)
step 31540 loss = 0.287 (2.793 sec/step)
step 31550 loss = 0.288 (3.217 sec/step)
step 31560 loss = 0.297 (2.886 sec/step)
step 31570 loss = 0.267 (3.114 sec/step)
step 31580 loss = 0.280 (2.867 sec/step)
step 31590 loss = 0.287 (2.973 sec/step)
step 31600 loss = 0.286 (2.870 sec/step)
step 31610 loss = 0.286 (2.831 sec/step)
step 31620 loss = 0.288 (2.811 sec/step)
step 31630 loss = 0.281 (2.635 sec/step)
step 31640 loss = 0.277 (2.917 sec/step)
step 31650 loss = 0.276 (2.452 sec/step)
step 31660 loss = 0.288 (3.008 sec/step)
step 31670 loss = 0.292 (3.383 sec/step)
step 31680 loss = 0.267 (3.265 sec/step)
step 31690 loss = 0.273 (3.011 sec/step)
step 31700 loss = 0.288 (2.782 sec/step)
step 31710 loss = 0.280 (2.509 sec/step)
step 31720 loss = 0.277 (2.640 sec/step)
step 31730 loss = 0.288 (2.845 sec/step)
step 31740 loss = 0.291 (2.991 sec/step)
step 31750 loss = 0.269 (3.192 sec/step)
step 31760 loss = 0.291 (2.704 sec/step)
step 31770 loss = 0.274 (2.746 sec/step)
step 31780 loss = 0.279 (2.828 sec/step)
step 31790 loss = 0.265 (2.851 sec/step)
step 31800 loss = 0.288 (2.689 sec/step)
step 31810 loss = 0.286 (2.875 sec/step)
step 31820 loss = 0.262 (2.944 sec/step)
step 31830 loss = 0.299 (2.910 sec/step)
step 31840 loss = 0.278 (3.134 sec/step)
step 31850 loss = 0.280 (3.015 sec/step)
step 31860 loss = 0.257 (2.458 sec/step)
step 31870 loss = 0.276 (3.233 sec/step)
step 31880 loss = 0.266 (3.053 sec/step)
step 31890 loss = 0.278 (2.903 sec/step)
step 31900 loss = 0.278 (3.019 sec/step)
step 31910 loss = 0.296 (2.822 sec/step)
step 31920 loss = 0.269 (2.759 sec/step)
step 31930 loss = 0.282 (2.963 sec/step)
step 31940 loss = 0.276 (2.957 sec/step)
step 31950 loss = 0.270 (2.943 sec/step)
step 31960 loss = 0.288 (2.765 sec/step)
step 31970 loss = 0.272 (3.138 sec/step)
step 31980 loss = 0.266 (2.979 sec/step)
step 31990 loss = 0.287 (3.054 sec/step)
step 32000 loss = 0.304 (2.958 sec/step)
step 32010 loss = 0.277 (3.037 sec/step)
step 32020 loss = 0.287 (2.918 sec/step)
step 32030 loss = 0.283 (2.753 sec/step)
step 32040 loss = 0.269 (2.719 sec/step)
step 32050 loss = 0.289 (2.787 sec/step)
step 32060 loss = 0.281 (2.902 sec/step)
step 32070 loss = 0.277 (2.651 sec/step)
step 32080 loss = 0.259 (2.831 sec/step)
step 32090 loss = 0.280 (2.510 sec/step)
step 32100 loss = 0.289 (3.147 sec/step)
step 32110 loss = 0.273 (2.938 sec/step)
step 32120 loss = 0.273 (2.715 sec/step)
step 32130 loss = 0.284 (2.932 sec/step)
step 32140 loss = 0.265 (2.939 sec/step)
step 32150 loss = 0.279 (2.998 sec/step)
step 32160 loss = 0.269 (2.595 sec/step)
step 32170 loss = 0.277 (2.836 sec/step)
step 32180 loss = 0.284 (2.824 sec/step)
step 32190 loss = 0.271 (2.763 sec/step)
step 32200 loss = 0.291 (2.815 sec/step)
step 32210 loss = 0.275 (2.916 sec/step)
step 32220 loss = 0.278 (2.645 sec/step)
step 32230 loss = 0.282 (2.820 sec/step)
step 32240 loss = 0.306 (2.950 sec/step)
step 32250 loss = 0.298 (3.005 sec/step)
step 32260 loss = 0.308 (2.657 sec/step)
step 32270 loss = 0.278 (2.647 sec/step)
step 32280 loss = 0.293 (2.945 sec/step)
step 32290 loss = 0.284 (2.700 sec/step)
step 32300 loss = 0.267 (2.867 sec/step)
step 32310 loss = 0.302 (2.841 sec/step)
step 32320 loss = 0.282 (2.743 sec/step)
step 32330 loss = 0.265 (2.689 sec/step)
step 32340 loss = 0.274 (2.945 sec/step)
step 32350 loss = 0.295 (2.794 sec/step)
step 32360 loss = 0.283 (3.153 sec/step)
step 32370 loss = 0.258 (3.300 sec/step)
step 32380 loss = 0.282 (2.440 sec/step)
step 32390 loss = 0.280 (2.932 sec/step)
step 32400 loss = 0.281 (2.669 sec/step)
step 32410 loss = 0.264 (2.953 sec/step)
step 32420 loss = 0.295 (2.862 sec/step)
step 32430 loss = 0.290 (2.774 sec/step)
step 32440 loss = 0.308 (2.798 sec/step)
step 32450 loss = 0.309 (3.240 sec/step)
step 32460 loss = 0.267 (2.712 sec/step)
step 32470 loss = 0.306 (2.775 sec/step)
step 32480 loss = 0.277 (2.647 sec/step)
step 32490 loss = 0.272 (2.647 sec/step)
step 32500 loss = 0.263 (2.694 sec/step)
step 32510 loss = 0.296 (2.824 sec/step)
step 32520 loss = 0.285 (2.781 sec/step)
step 32530 loss = 0.290 (2.827 sec/step)
step 32540 loss = 0.274 (2.944 sec/step)
step 32550 loss = 0.277 (2.866 sec/step)
step 32560 loss = 0.282 (2.972 sec/step)
step 32570 loss = 0.269 (2.797 sec/step)
step 32580 loss = 0.288 (2.613 sec/step)
step 32590 loss = 0.290 (2.665 sec/step)
step 32600 loss = 0.286 (2.908 sec/step)
step 32610 loss = 0.280 (2.871 sec/step)
step 32620 loss = 0.300 (2.920 sec/step)
step 32630 loss = 0.286 (2.910 sec/step)
step 32640 loss = 0.269 (2.771 sec/step)
step 32650 loss = 0.269 (2.632 sec/step)
step 32660 loss = 0.292 (3.230 sec/step)
step 32670 loss = 0.283 (2.752 sec/step)
step 32680 loss = 0.298 (3.141 sec/step)
step 32690 loss = 0.280 (2.412 sec/step)
step 32700 loss = 0.279 (2.452 sec/step)
step 32710 loss = 0.286 (2.374 sec/step)
step 32720 loss = 0.287 (2.271 sec/step)
step 32730 loss = 0.298 (2.430 sec/step)
step 32740 loss = 0.297 (2.209 sec/step)
step 32750 loss = 0.271 (2.383 sec/step)
step 32760 loss = 0.286 (2.335 sec/step)
step 32770 loss = 0.286 (2.495 sec/step)
step 32780 loss = 0.294 (2.452 sec/step)
step 32790 loss = 0.278 (2.359 sec/step)
step 32800 loss = 0.293 (2.361 sec/step)
step 32810 loss = 0.294 (2.216 sec/step)
step 32820 loss = 0.269 (2.242 sec/step)
step 32830 loss = 0.286 (3.902 sec/step)
step 32840 loss = 0.293 (2.621 sec/step)
step 32850 loss = 0.281 (3.027 sec/step)
step 32860 loss = 0.285 (2.720 sec/step)
step 32870 loss = 0.300 (2.471 sec/step)
step 32880 loss = 0.288 (2.810 sec/step)
step 32890 loss = 0.283 (3.106 sec/step)
step 32900 loss = 0.266 (3.007 sec/step)
step 32910 loss = 0.281 (2.941 sec/step)
step 32920 loss = 0.284 (2.868 sec/step)
step 32930 loss = 0.278 (2.690 sec/step)
step 32940 loss = 0.290 (2.934 sec/step)
step 32950 loss = 0.293 (2.796 sec/step)
step 32960 loss = 0.281 (2.719 sec/step)
step 32970 loss = 0.271 (2.744 sec/step)
step 32980 loss = 0.281 (2.755 sec/step)
step 32990 loss = 0.285 (2.679 sec/step)
step 33000 loss = 0.290 (2.767 sec/step)
step 33010 loss = 0.270 (2.998 sec/step)
step 33020 loss = 0.278 (2.485 sec/step)
step 33030 loss = 0.282 (2.705 sec/step)
step 33040 loss = 0.284 (2.979 sec/step)
step 33050 loss = 0.275 (2.554 sec/step)
step 33060 loss = 0.291 (2.565 sec/step)
step 33070 loss = 0.286 (2.922 sec/step)
step 33080 loss = 0.291 (2.559 sec/step)
step 33090 loss = 0.296 (2.793 sec/step)
step 33100 loss = 0.289 (2.817 sec/step)
step 33110 loss = 0.284 (3.033 sec/step)
step 33120 loss = 0.278 (2.632 sec/step)
step 33130 loss = 0.284 (2.603 sec/step)
step 33140 loss = 0.263 (3.135 sec/step)
step 33150 loss = 0.278 (2.374 sec/step)
step 33160 loss = 0.287 (2.929 sec/step)
step 33170 loss = 0.292 (2.919 sec/step)
step 33180 loss = 0.276 (2.603 sec/step)
step 33190 loss = 0.289 (2.801 sec/step)
step 33200 loss = 0.289 (2.462 sec/step)
step 33210 loss = 0.296 (2.806 sec/step)
step 33220 loss = 0.272 (2.943 sec/step)
step 33230 loss = 0.280 (2.708 sec/step)
step 33240 loss = 0.276 (2.550 sec/step)
step 33250 loss = 0.282 (2.653 sec/step)
step 33260 loss = 0.294 (2.932 sec/step)
step 33270 loss = 0.294 (2.439 sec/step)
step 33280 loss = 0.280 (2.679 sec/step)
step 33290 loss = 0.280 (2.899 sec/step)
step 33300 loss = 0.288 (2.457 sec/step)
step 33310 loss = 0.270 (2.894 sec/step)
step 33320 loss = 0.273 (3.074 sec/step)
step 33330 loss = 0.278 (2.848 sec/step)
step 33340 loss = 0.274 (3.290 sec/step)
step 33350 loss = 0.263 (3.167 sec/step)
step 33360 loss = 0.275 (2.795 sec/step)
step 33370 loss = 0.260 (2.592 sec/step)
Training Epoch 9/100
step 33380 loss = 0.263 (2.826 sec/step)
step 33390 loss = 0.257 (2.810 sec/step)
step 33400 loss = 0.244 (3.029 sec/step)
step 33410 loss = 0.250 (2.782 sec/step)
step 33420 loss = 0.265 (2.632 sec/step)
step 33430 loss = 0.256 (3.057 sec/step)
step 33440 loss = 0.260 (2.837 sec/step)
step 33450 loss = 0.278 (2.908 sec/step)
step 33460 loss = 0.271 (2.632 sec/step)
step 33470 loss = 0.273 (2.970 sec/step)
step 33480 loss = 0.265 (2.488 sec/step)
step 33490 loss = 0.254 (3.129 sec/step)
step 33500 loss = 0.275 (2.639 sec/step)
step 33510 loss = 0.262 (2.960 sec/step)
step 33520 loss = 0.255 (2.874 sec/step)
step 33530 loss = 0.282 (2.844 sec/step)
step 33540 loss = 0.284 (2.865 sec/step)
step 33550 loss = 0.250 (2.621 sec/step)
step 33560 loss = 0.268 (2.997 sec/step)
step 33570 loss = 0.265 (2.815 sec/step)
step 33580 loss = 0.259 (3.187 sec/step)
step 33590 loss = 0.273 (2.763 sec/step)
step 33600 loss = 0.261 (2.891 sec/step)
step 33610 loss = 0.250 (2.720 sec/step)
step 33620 loss = 0.276 (2.674 sec/step)
step 33630 loss = 0.269 (2.754 sec/step)
step 33640 loss = 0.270 (2.926 sec/step)
step 33650 loss = 0.252 (2.949 sec/step)
step 33660 loss = 0.256 (2.798 sec/step)
step 33670 loss = 0.283 (2.840 sec/step)
step 33680 loss = 0.267 (2.647 sec/step)
step 33690 loss = 0.268 (3.143 sec/step)
step 33700 loss = 0.267 (2.903 sec/step)
step 33710 loss = 0.273 (2.968 sec/step)
step 33720 loss = 0.254 (2.833 sec/step)
step 33730 loss = 0.250 (3.008 sec/step)
step 33740 loss = 0.263 (2.502 sec/step)
step 33750 loss = 0.278 (2.858 sec/step)
step 33760 loss = 0.274 (2.749 sec/step)
step 33770 loss = 0.252 (3.096 sec/step)
step 33780 loss = 0.254 (2.572 sec/step)
step 33790 loss = 0.256 (2.909 sec/step)
step 33800 loss = 0.260 (3.224 sec/step)
step 33810 loss = 0.264 (2.909 sec/step)
step 33820 loss = 0.266 (2.806 sec/step)
step 33830 loss = 0.277 (2.394 sec/step)
step 33840 loss = 0.254 (2.630 sec/step)
step 33850 loss = 0.262 (3.104 sec/step)
step 33860 loss = 0.260 (2.913 sec/step)
step 33870 loss = 0.257 (2.989 sec/step)
step 33880 loss = 0.269 (2.550 sec/step)
step 33890 loss = 0.240 (2.418 sec/step)
step 33900 loss = 0.266 (2.557 sec/step)
step 33910 loss = 0.279 (2.666 sec/step)
step 33920 loss = 0.283 (2.750 sec/step)
step 33930 loss = 0.265 (3.066 sec/step)
step 33940 loss = 0.253 (2.993 sec/step)
step 33950 loss = 0.255 (2.796 sec/step)
step 33960 loss = 0.271 (2.670 sec/step)
step 33970 loss = 0.273 (2.744 sec/step)
step 33980 loss = 0.271 (2.655 sec/step)
step 33990 loss = 0.257 (2.746 sec/step)
step 34000 loss = 0.273 (2.955 sec/step)
step 34010 loss = 0.268 (2.935 sec/step)
step 34020 loss = 0.265 (2.653 sec/step)
step 34030 loss = 0.281 (3.180 sec/step)
step 34040 loss = 0.275 (2.935 sec/step)
step 34050 loss = 0.266 (3.087 sec/step)
step 34060 loss = 0.269 (2.860 sec/step)
step 34070 loss = 0.270 (2.926 sec/step)
step 34080 loss = 0.243 (2.574 sec/step)
step 34090 loss = 0.254 (2.477 sec/step)
step 34100 loss = 0.274 (2.704 sec/step)
step 34110 loss = 0.273 (3.023 sec/step)
step 34120 loss = 0.286 (2.998 sec/step)
step 34130 loss = 0.281 (2.940 sec/step)
step 34140 loss = 0.269 (2.849 sec/step)
step 34150 loss = 0.261 (2.925 sec/step)
step 34160 loss = 0.271 (2.841 sec/step)
step 34170 loss = 0.253 (2.882 sec/step)
step 34180 loss = 0.278 (2.643 sec/step)
step 34190 loss = 0.262 (2.739 sec/step)
step 34200 loss = 0.259 (2.457 sec/step)
step 34210 loss = 0.266 (2.533 sec/step)
step 34220 loss = 0.271 (2.845 sec/step)
step 34230 loss = 0.270 (2.686 sec/step)
step 34240 loss = 0.289 (2.995 sec/step)
step 34250 loss = 0.267 (2.705 sec/step)
step 34260 loss = 0.259 (2.665 sec/step)
step 34270 loss = 0.249 (2.743 sec/step)
step 34280 loss = 0.279 (2.608 sec/step)
step 34290 loss = 0.277 (2.772 sec/step)
step 34300 loss = 0.259 (2.642 sec/step)
step 34310 loss = 0.283 (3.048 sec/step)
step 34320 loss = 0.256 (2.848 sec/step)
step 34330 loss = 0.272 (2.632 sec/step)
step 34340 loss = 0.256 (3.021 sec/step)
step 34350 loss = 0.270 (2.819 sec/step)
step 34360 loss = 0.266 (2.944 sec/step)
step 34370 loss = 0.273 (2.858 sec/step)
step 34380 loss = 0.258 (2.687 sec/step)
step 34390 loss = 0.272 (3.101 sec/step)
step 34400 loss = 0.269 (2.935 sec/step)
step 34410 loss = 0.263 (2.620 sec/step)
step 34420 loss = 0.255 (2.960 sec/step)
step 34430 loss = 0.278 (2.866 sec/step)
step 34440 loss = 0.264 (2.501 sec/step)
step 34450 loss = 0.276 (2.806 sec/step)
step 34460 loss = 0.259 (2.589 sec/step)
step 34470 loss = 0.265 (3.005 sec/step)
step 34480 loss = 0.269 (3.176 sec/step)
step 34490 loss = 0.266 (2.892 sec/step)
step 34500 loss = 0.273 (3.105 sec/step)
step 34510 loss = 0.253 (2.904 sec/step)
step 34520 loss = 0.272 (2.960 sec/step)
step 34530 loss = 0.255 (2.924 sec/step)
step 34540 loss = 0.258 (2.800 sec/step)
step 34550 loss = 0.242 (2.815 sec/step)
step 34560 loss = 0.268 (2.694 sec/step)
step 34570 loss = 0.257 (3.075 sec/step)
step 34580 loss = 0.269 (3.090 sec/step)
step 34590 loss = 0.270 (2.979 sec/step)
step 34600 loss = 0.286 (2.695 sec/step)
step 34610 loss = 0.251 (2.858 sec/step)
step 34620 loss = 0.268 (2.626 sec/step)
step 34630 loss = 0.272 (2.590 sec/step)
step 34640 loss = 0.271 (2.734 sec/step)
step 34650 loss = 0.257 (2.717 sec/step)
step 34660 loss = 0.253 (2.611 sec/step)
step 34670 loss = 0.257 (2.993 sec/step)
step 34680 loss = 0.273 (2.816 sec/step)
step 34690 loss = 0.270 (2.961 sec/step)
step 34700 loss = 0.269 (2.842 sec/step)
step 34710 loss = 0.275 (2.799 sec/step)
step 34720 loss = 0.262 (2.616 sec/step)
step 34730 loss = 0.271 (2.841 sec/step)
step 34740 loss = 0.296 (2.758 sec/step)
step 34750 loss = 0.274 (2.839 sec/step)
step 34760 loss = 0.279 (2.951 sec/step)
step 34770 loss = 0.291 (2.483 sec/step)
step 34780 loss = 0.283 (2.656 sec/step)
step 34790 loss = 0.244 (3.131 sec/step)
step 34800 loss = 0.249 (2.525 sec/step)
step 34810 loss = 0.256 (3.038 sec/step)
step 34820 loss = 0.263 (2.737 sec/step)
step 34830 loss = 0.262 (2.952 sec/step)
step 34840 loss = 0.275 (3.001 sec/step)
step 34850 loss = 0.263 (2.815 sec/step)
step 34860 loss = 0.254 (2.992 sec/step)
step 34870 loss = 0.267 (2.859 sec/step)
step 34880 loss = 0.262 (2.836 sec/step)
step 34890 loss = 0.269 (2.990 sec/step)
step 34900 loss = 0.281 (2.612 sec/step)
step 34910 loss = 0.255 (2.662 sec/step)
step 34920 loss = 0.266 (3.270 sec/step)
step 34930 loss = 0.268 (2.776 sec/step)
step 34940 loss = 0.274 (2.676 sec/step)
step 34950 loss = 0.268 (3.138 sec/step)
step 34960 loss = 0.277 (3.034 sec/step)
step 34970 loss = 0.288 (2.919 sec/step)
step 34980 loss = 0.271 (2.723 sec/step)
step 34990 loss = 0.267 (2.781 sec/step)
step 35000 loss = 0.267 (2.925 sec/step)
step 35010 loss = 0.268 (2.599 sec/step)
step 35020 loss = 0.265 (2.852 sec/step)
step 35030 loss = 0.254 (3.014 sec/step)
step 35040 loss = 0.263 (3.125 sec/step)
step 35050 loss = 0.277 (2.950 sec/step)
step 35060 loss = 0.279 (2.483 sec/step)
step 35070 loss = 0.257 (2.682 sec/step)
step 35080 loss = 0.265 (2.744 sec/step)
step 35090 loss = 0.280 (3.165 sec/step)
step 35100 loss = 0.279 (2.727 sec/step)
step 35110 loss = 0.266 (2.634 sec/step)
step 35120 loss = 0.266 (2.669 sec/step)
step 35130 loss = 0.275 (2.631 sec/step)
step 35140 loss = 0.281 (3.116 sec/step)
step 35150 loss = 0.284 (2.636 sec/step)
step 35160 loss = 0.254 (2.825 sec/step)
step 35170 loss = 0.262 (2.884 sec/step)
step 35180 loss = 0.255 (2.518 sec/step)
step 35190 loss = 0.262 (2.658 sec/step)
step 35200 loss = 0.280 (2.967 sec/step)
step 35210 loss = 0.271 (2.634 sec/step)
step 35220 loss = 0.253 (2.962 sec/step)
step 35230 loss = 0.266 (3.074 sec/step)
step 35240 loss = 0.265 (2.635 sec/step)
step 35250 loss = 0.276 (2.771 sec/step)
step 35260 loss = 0.274 (2.763 sec/step)
step 35270 loss = 0.275 (2.865 sec/step)
step 35280 loss = 0.287 (2.769 sec/step)
step 35290 loss = 0.285 (3.080 sec/step)
step 35300 loss = 0.254 (2.941 sec/step)
step 35310 loss = 0.255 (2.359 sec/step)
step 35320 loss = 0.275 (2.846 sec/step)
step 35330 loss = 0.257 (2.926 sec/step)
step 35340 loss = 0.270 (2.588 sec/step)
step 35350 loss = 0.270 (2.950 sec/step)
step 35360 loss = 0.260 (2.672 sec/step)
step 35370 loss = 0.250 (2.727 sec/step)
step 35380 loss = 0.273 (2.845 sec/step)
step 35390 loss = 0.273 (2.559 sec/step)
step 35400 loss = 0.273 (2.611 sec/step)
step 35410 loss = 0.268 (2.785 sec/step)
step 35420 loss = 0.257 (3.048 sec/step)
step 35430 loss = 0.271 (2.810 sec/step)
step 35440 loss = 0.270 (2.461 sec/step)
step 35450 loss = 0.266 (2.458 sec/step)
step 35460 loss = 0.262 (3.135 sec/step)
step 35470 loss = 0.275 (2.814 sec/step)
step 35480 loss = 0.274 (2.804 sec/step)
step 35490 loss = 0.258 (2.621 sec/step)
step 35500 loss = 0.256 (2.896 sec/step)
step 35510 loss = 0.265 (2.430 sec/step)
step 35520 loss = 0.257 (2.748 sec/step)
step 35530 loss = 0.266 (2.502 sec/step)
step 35540 loss = 0.275 (2.729 sec/step)
step 35550 loss = 0.271 (2.855 sec/step)
step 35560 loss = 0.290 (3.146 sec/step)
step 35570 loss = 0.266 (2.812 sec/step)
step 35580 loss = 0.263 (2.769 sec/step)
step 35590 loss = 0.262 (2.813 sec/step)
step 35600 loss = 0.263 (2.699 sec/step)
step 35610 loss = 0.264 (2.647 sec/step)
step 35620 loss = 0.262 (2.809 sec/step)
step 35630 loss = 0.264 (2.871 sec/step)
step 35640 loss = 0.284 (3.260 sec/step)
step 35650 loss = 0.281 (2.651 sec/step)
step 35660 loss = 0.282 (2.768 sec/step)
step 35670 loss = 0.259 (3.048 sec/step)
step 35680 loss = 0.277 (2.889 sec/step)
step 35690 loss = 0.265 (2.758 sec/step)
step 35700 loss = 0.268 (2.654 sec/step)
step 35710 loss = 0.287 (3.071 sec/step)
step 35720 loss = 0.268 (2.679 sec/step)
step 35730 loss = 0.276 (2.730 sec/step)
step 35740 loss = 0.282 (3.018 sec/step)
step 35750 loss = 0.272 (2.886 sec/step)
step 35760 loss = 0.269 (2.462 sec/step)
step 35770 loss = 0.279 (3.195 sec/step)
step 35780 loss = 0.273 (2.699 sec/step)
step 35790 loss = 0.262 (2.766 sec/step)
step 35800 loss = 0.269 (2.604 sec/step)
step 35810 loss = 0.287 (2.720 sec/step)
step 35820 loss = 0.277 (2.593 sec/step)
step 35830 loss = 0.275 (2.850 sec/step)
step 35840 loss = 0.275 (2.913 sec/step)
step 35850 loss = 0.265 (3.139 sec/step)
step 35860 loss = 0.282 (3.250 sec/step)
step 35870 loss = 0.274 (3.054 sec/step)
step 35880 loss = 0.267 (2.964 sec/step)
step 35890 loss = 0.285 (2.746 sec/step)
step 35900 loss = 0.282 (3.033 sec/step)
step 35910 loss = 0.274 (2.603 sec/step)
step 35920 loss = 0.282 (2.815 sec/step)
step 35930 loss = 0.263 (2.639 sec/step)
step 35940 loss = 0.261 (3.014 sec/step)
step 35950 loss = 0.274 (2.928 sec/step)
step 35960 loss = 0.277 (2.839 sec/step)
step 35970 loss = 0.269 (2.828 sec/step)
step 35980 loss = 0.277 (2.693 sec/step)
step 35990 loss = 0.271 (2.647 sec/step)
step 36000 loss = 0.273 (2.848 sec/step)
step 36010 loss = 0.283 (3.012 sec/step)
step 36020 loss = 0.275 (2.839 sec/step)
step 36030 loss = 0.262 (3.008 sec/step)
step 36040 loss = 0.264 (2.585 sec/step)
step 36050 loss = 0.255 (2.732 sec/step)
step 36060 loss = 0.264 (2.982 sec/step)
step 36070 loss = 0.277 (2.633 sec/step)
step 36080 loss = 0.257 (2.774 sec/step)
step 36090 loss = 0.283 (2.649 sec/step)
step 36100 loss = 0.294 (2.979 sec/step)
step 36110 loss = 0.267 (3.026 sec/step)
step 36120 loss = 0.272 (3.021 sec/step)
step 36130 loss = 0.262 (3.223 sec/step)
step 36140 loss = 0.266 (2.866 sec/step)
step 36150 loss = 0.277 (2.838 sec/step)
step 36160 loss = 0.281 (3.083 sec/step)
step 36170 loss = 0.259 (2.925 sec/step)
step 36180 loss = 0.272 (2.591 sec/step)
step 36190 loss = 0.277 (3.012 sec/step)
step 36200 loss = 0.267 (2.783 sec/step)
step 36210 loss = 0.268 (2.844 sec/step)
step 36220 loss = 0.269 (2.959 sec/step)
step 36230 loss = 0.275 (2.596 sec/step)
step 36240 loss = 0.256 (2.703 sec/step)
step 36250 loss = 0.264 (2.724 sec/step)
step 36260 loss = 0.285 (3.074 sec/step)
step 36270 loss = 0.258 (2.685 sec/step)
step 36280 loss = 0.275 (2.870 sec/step)
step 36290 loss = 0.274 (2.659 sec/step)
step 36300 loss = 0.274 (2.923 sec/step)
step 36310 loss = 0.284 (2.775 sec/step)
step 36320 loss = 0.264 (3.038 sec/step)
step 36330 loss = 0.276 (2.688 sec/step)
step 36340 loss = 0.279 (2.934 sec/step)
step 36350 loss = 0.262 (2.885 sec/step)
step 36360 loss = 0.270 (2.872 sec/step)
step 36370 loss = 0.276 (2.930 sec/step)
step 36380 loss = 0.254 (2.971 sec/step)
step 36390 loss = 0.262 (2.721 sec/step)
step 36400 loss = 0.300 (2.950 sec/step)
step 36410 loss = 0.264 (2.532 sec/step)
step 36420 loss = 0.261 (2.607 sec/step)
step 36430 loss = 0.261 (2.588 sec/step)
step 36440 loss = 0.258 (2.738 sec/step)
step 36450 loss = 0.273 (2.700 sec/step)
step 36460 loss = 0.288 (2.913 sec/step)
step 36470 loss = 0.270 (2.493 sec/step)
step 36480 loss = 0.277 (2.401 sec/step)
step 36490 loss = 0.260 (2.806 sec/step)
step 36500 loss = 0.262 (3.123 sec/step)
step 36510 loss = 0.275 (3.100 sec/step)
step 36520 loss = 0.258 (2.823 sec/step)
step 36530 loss = 0.268 (3.017 sec/step)
step 36540 loss = 0.264 (2.971 sec/step)
step 36550 loss = 0.267 (2.778 sec/step)
step 36560 loss = 0.269 (2.820 sec/step)
step 36570 loss = 0.279 (2.773 sec/step)
step 36580 loss = 0.267 (2.636 sec/step)
step 36590 loss = 0.276 (2.999 sec/step)
step 36600 loss = 0.277 (2.813 sec/step)
step 36610 loss = 0.291 (3.243 sec/step)
step 36620 loss = 0.282 (3.002 sec/step)
step 36630 loss = 0.272 (2.747 sec/step)
step 36640 loss = 0.267 (2.842 sec/step)
step 36650 loss = 0.265 (2.548 sec/step)
step 36660 loss = 0.281 (2.846 sec/step)
step 36670 loss = 0.263 (2.728 sec/step)
step 36680 loss = 0.259 (2.846 sec/step)
step 36690 loss = 0.263 (2.796 sec/step)
step 36700 loss = 0.265 (2.600 sec/step)
step 36710 loss = 0.280 (2.675 sec/step)
step 36720 loss = 0.265 (2.647 sec/step)
step 36730 loss = 0.274 (2.760 sec/step)
step 36740 loss = 0.258 (3.122 sec/step)
step 36750 loss = 0.267 (2.957 sec/step)
step 36760 loss = 0.262 (2.894 sec/step)
step 36770 loss = 0.272 (2.763 sec/step)
step 36780 loss = 0.273 (2.830 sec/step)
step 36790 loss = 0.260 (2.799 sec/step)
step 36800 loss = 0.265 (3.221 sec/step)
step 36810 loss = 0.256 (2.664 sec/step)
step 36820 loss = 0.267 (2.764 sec/step)
step 36830 loss = 0.290 (2.816 sec/step)
step 36840 loss = 0.267 (2.919 sec/step)
step 36850 loss = 0.271 (2.665 sec/step)
step 36860 loss = 0.291 (3.187 sec/step)
step 36870 loss = 0.265 (2.786 sec/step)
step 36880 loss = 0.262 (2.692 sec/step)
step 36890 loss = 0.264 (2.829 sec/step)
step 36900 loss = 0.283 (2.832 sec/step)
step 36910 loss = 0.257 (2.805 sec/step)
step 36920 loss = 0.261 (2.912 sec/step)
step 36930 loss = 0.284 (2.768 sec/step)
step 36940 loss = 0.264 (2.606 sec/step)
step 36950 loss = 0.285 (2.750 sec/step)
step 36960 loss = 0.268 (2.823 sec/step)
step 36970 loss = 0.276 (3.403 sec/step)
step 36980 loss = 0.268 (2.903 sec/step)
step 36990 loss = 0.283 (2.675 sec/step)
step 37000 loss = 0.268 (2.803 sec/step)
step 37010 loss = 0.270 (2.809 sec/step)
step 37020 loss = 0.268 (3.080 sec/step)
step 37030 loss = 0.278 (2.824 sec/step)
step 37040 loss = 0.268 (2.752 sec/step)
step 37050 loss = 0.255 (2.481 sec/step)
step 37060 loss = 0.277 (2.672 sec/step)
step 37070 loss = 0.264 (3.100 sec/step)
Training Epoch 10/100
step 37080 loss = 0.247 (2.886 sec/step)
step 37090 loss = 0.247 (2.892 sec/step)
step 37100 loss = 0.250 (2.872 sec/step)
step 37110 loss = 0.255 (2.866 sec/step)
step 37120 loss = 0.245 (2.682 sec/step)
step 37130 loss = 0.249 (3.000 sec/step)
step 37140 loss = 0.259 (2.790 sec/step)
step 37150 loss = 0.241 (2.787 sec/step)
step 37160 loss = 0.245 (2.955 sec/step)
step 37170 loss = 0.258 (2.626 sec/step)
step 37180 loss = 0.259 (2.983 sec/step)
step 37190 loss = 0.244 (2.633 sec/step)
step 37200 loss = 0.259 (2.681 sec/step)
step 37210 loss = 0.257 (3.103 sec/step)
step 37220 loss = 0.244 (2.880 sec/step)
step 37230 loss = 0.253 (2.839 sec/step)
step 37240 loss = 0.262 (3.088 sec/step)
step 37250 loss = 0.240 (2.974 sec/step)
step 37260 loss = 0.247 (2.887 sec/step)
step 37270 loss = 0.250 (2.756 sec/step)
step 37280 loss = 0.259 (2.766 sec/step)
step 37290 loss = 0.256 (3.065 sec/step)
step 37300 loss = 0.262 (3.192 sec/step)
step 37310 loss = 0.240 (3.000 sec/step)
step 37320 loss = 0.265 (2.791 sec/step)
step 37330 loss = 0.251 (3.068 sec/step)
step 37340 loss = 0.258 (3.094 sec/step)
step 37350 loss = 0.245 (2.708 sec/step)
step 37360 loss = 0.244 (2.810 sec/step)
step 37370 loss = 0.243 (2.632 sec/step)
step 37380 loss = 0.234 (2.697 sec/step)
step 37390 loss = 0.238 (2.733 sec/step)
step 37400 loss = 0.248 (2.797 sec/step)
step 37410 loss = 0.237 (2.722 sec/step)
step 37420 loss = 0.239 (2.761 sec/step)
step 37430 loss = 0.264 (2.811 sec/step)
step 37440 loss = 0.259 (2.716 sec/step)
step 37450 loss = 0.252 (2.977 sec/step)
step 37460 loss = 0.258 (2.794 sec/step)
In [ ]:
Content source: animeshramesh/incremental-learning
Similar notebooks:
notebook.community | gallery | about