Todo:


In [1]:
EXPERIMENT_NAME = 'experiment_01_stanford40_train'

In [2]:
import tensorflow as tf
import numpy as np
import os, sys
from time import time
from matplotlib import pyplot as plt
from itertools import cycle
import random
import pickle

from utils import optimistic_restore, save
import layers

PWD = os.getcwd()
sys.path.insert(0, os.path.abspath(os.path.join(PWD, '..')))

import pdb

In [3]:
'''
HYPERPARAMS
'''
BATCH_SIZE = 10
PATIENCE = 2
TRIPLETS_TRAIN = '/media/red/capstone/data/stanford40_triplets_train.pkl'
TRIPLETS_VALIDATION = '/media/red/capstone/data/stanford40_triplets_val.pkl'

LEARNING_RATE = 1e-4
LEARNING_RATE_DECAY = 0.7
BETA1 = 0.9
BETA2 = 0.99
NUM_EPOCH = 100
RANDOM_SEED = 1234
SUMMARY_EVERY = 10
VALIDATION_PERCENTAGE = 0.05
SNAPSHOT_MAX = 3 # Keeps the last best 10 snapshots (best determined by validation accuracy)
SNAPSHOT_DIR = os.path.join('/media/red/capstone/snapshots/', EXPERIMENT_NAME)

# Network params
NORMALIZE = True
N_FEAT = 4096

np.random.seed(seed=RANDOM_SEED)

In [4]:
'''
Load Triplets
'''
class SVM_Triplet:
    def __init__(self, X1, X2, Y, base_classes, pos_class, new_class):
        self.X1 = X1
        self.X2 = X2
        self.Y = Y
        self.base_classes = base_classes
        self.pos_class = pos_class
        self.new_class = new_class
        
# Load features
triplets_loadin = {}
triplets_loadin['train'] = pickle.load(open(TRIPLETS_TRAIN, "rb"))
triplets_loadin['validation'] = pickle.load(open(TRIPLETS_VALIDATION, "rb"))

x_data = {
    'train':[],
    'validation':[]
}
y_data = {
    'train':[],
    'validation':[]
}
for partition in ['train', 'validation']:
    for triplet in triplets_loadin[partition]:
        if NORMALIZE:
            X1 = triplet.X1 / np.linalg.norm(triplet.X1, axis=0, keepdims=True)
            X2 = triplet.X2 / np.linalg.norm(triplet.X2, axis=0, keepdims=True)
            Y  = triplet.Y  / np.linalg.norm(triplet.Y,  axis=0, keepdims=True)
            x_data[partition].append(np.hstack((X1, X2)))
            y_data[partition].append(Y-X1)
        else:   
            x_data[partition].append(np.hstack((triplet.X1, triplet.X2)))
            y_data[partition].append(triplet.Y-triplet.X1)
        
x_data['train'] = np.stack(x_data['train'])
y_data['train'] = np.stack(y_data['train'])
x_data['validation'] = np.stack(x_data['validation'])
y_data['validation'] = np.stack(y_data['validation'])

In [ ]:
'''
Declare model
'''

def lrelu(x, alpha=0.1):
    return tf.nn.relu(x) - alpha * tf.nn.relu(-x)

def net(x, is_training):
    def dense_block(n_units):
        stack.append(layers.fc(
            input=stack[-1],
            units=n_units,
            activation='relu',
            name='fc'
            )[0])
        stack.append(tf.contrib.layers.batch_norm(
                stack[-1], 
                center=True, scale=True, 
                is_training=is_training,
                scope='bn'))
        stack.append(lrelu(stack[-1]))
        
    n_units_list = [2*N_FEAT, 2*N_FEAT, N_FEAT]
    stack = [x,]
    for i, n in enumerate(n_units_list):
        with tf.variable_scope("block_"+str(i)):
            dense_block(n)
    stack.append(layers.fc(
            input=stack[-1],
            units=4096,
            activation='linear',
            name='fc_final'
            )[0])
    return stack

In [ ]:
'''
Model Setup
'''
x = tf.placeholder(dtype=tf.float32, shape=(None, 8192))
y = tf.placeholder(dtype=tf.float32, shape=(None, 4096))
is_training = tf.placeholder(dtype=tf.bool)

global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False, initializer=0) #incremented everytime optimizer runs
lr = tf.get_variable('learning_rate', dtype=tf.float32, trainable=False, initializer=LEARNING_RATE)

net = net(x, is_training)

'''
Loss, Metrics, and Optimization Setup
'''
pred = net[-1]
pred_normalized = pred / tf.norm(pred, axis=1, keep_dims=True)
y_normalized = y / tf.norm(y,axis=1,keep_dims=True)
reduced_loss = tf.losses.cosine_distance(
        labels=y_normalized,
        predictions=pred_normalized,
        dim=1,
        reduction=tf.losses.Reduction.MEAN,
        )
train_loss_summary = tf.summary.scalar('training_loss', reduced_loss)

optimizer = tf.train.AdamOptimizer(
        learning_rate=lr,
        beta1=BETA1,
        beta2=BETA2,
        name='AdamOptimizer')
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(reduced_loss, tvars), 5.0)
train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step)

'''
TensorBoard Setup
'''
all_train_summary = tf.summary.merge_all()

summary_writer = tf.summary.FileWriter(SNAPSHOT_DIR,
        graph=tf.get_default_graph())

'''
Tensorflow Saver Setup
'''
saver = tf.train.Saver(var_list=tf.global_variables(),
                       max_to_keep=SNAPSHOT_MAX)

'''
Tensorflow Session Setup
'''
tf.set_random_seed(RANDOM_SEED)
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
sess = tf.Session(config=config)
init = tf.group(tf.global_variables_initializer(),
                tf.local_variables_initializer())
sess.run(init)

'''
Primary Loop
'''
partition_types = ['train', 'validation']
best_loss = float('inf')
patience_counter = 0
step_v = global_step.eval(session=sess)
for epoch in range(NUM_EPOCH):
    overall_loss = 0 # Variables used for validation
    
    print('Training Epoch {}/{}'.format(
            epoch, NUM_EPOCH))
    
    for partition in partition_types: # Itr through data partitions
        n_exemplars = x_data[partition].shape[0]
        if partition == 'train':
            shuffle_indices = np.arange(n_exemplars)
            np.random.shuffle(shuffle_indices)
            x_data['train'] = x_data['train'][shuffle_indices, ...]
            y_data['train'] = y_data['train'][shuffle_indices, ...]
        step_v = global_step.eval(session=sess)
        for i in range(0, n_exemplars, BATCH_SIZE):
            upper_range = i+BATCH_SIZE
            if upper_range > n_exemplars:
                upper_range = n_exemplars
            x_batch = x_data[partition][i:upper_range, ...]
            y_batch = y_data[partition][i:upper_range, ...]
            
            feed_dict = {
                x:x_batch,
                y:y_batch
            }
            if partition == 'train':
                feed_dict[is_training] = True
            else:
                feed_dict[is_training] = False
                
            # Run the proper sess run command
            if partition == 'train':
                start_t = time()
                if step_v % SUMMARY_EVERY == 0:
                    _, loss_v, summary_v = sess.run(
                        [train_op, reduced_loss, all_train_summary],
                        feed_dict=feed_dict)
                    summary_writer.add_summary(summary_v, step_v)
                    duration = time() - start_t
                    print('step {:d} \t loss = {:.3f} ({:.3f} sec/step)'.format(
                            step_v, loss_v, duration))
                else: # Vanilla Training
                    _ = sess.run([train_op], feed_dict=feed_dict)
                step_v = global_step.eval(session=sess)
            elif partition == 'validation':
                feed_dict[is_training] = False
                loss_v = sess.run(
                        [reduced_loss],
                        feed_dict=feed_dict)[0]
                overall_loss += loss_v
        # Post-epoch routine for validation set (saving, stat computation, etc)
        if partition == 'validation':
            duration = time() - start_t
            overall_loss /= x_data['validation'].shape[0]
            overall_loss_summary = tf.Summary()
            overall_loss_summary.value.add(tag='validation_loss', simple_value=overall_loss)
            summary_writer.add_summary(overall_loss_summary, step_v)

            if overall_loss < best_loss:
                print('New Best Loss {:.3f} < Old Best {:.3f}.  Saving...'.format(
                        overall_loss, best_loss))
                best_loss = overall_loss
                patience_counter = 0
                save(saver, sess, SNAPSHOT_DIR, step_v)
            else:
                patience_counter += 1
        if patience_counter >= PATIENCE:
            patience_counter = 0
            lr_v = lr.eval(session=sess) * LEARNING_RATE_DECAY
            lr.assign(lr_v).eval(session=sess)


Training Epoch 0/100
step 0 	 loss = 1.004 (3.001 sec/step)
step 10 	 loss = 0.999 (2.756 sec/step)
step 20 	 loss = 0.991 (2.561 sec/step)
step 30 	 loss = 0.972 (2.605 sec/step)
step 40 	 loss = 0.972 (2.527 sec/step)
step 50 	 loss = 0.968 (2.835 sec/step)
step 60 	 loss = 0.955 (2.768 sec/step)
step 70 	 loss = 0.934 (2.706 sec/step)
step 80 	 loss = 0.934 (2.485 sec/step)
step 90 	 loss = 0.935 (2.771 sec/step)
step 100 	 loss = 0.913 (2.771 sec/step)
step 110 	 loss = 0.905 (2.661 sec/step)
step 120 	 loss = 0.847 (2.888 sec/step)
step 130 	 loss = 0.895 (3.087 sec/step)
step 140 	 loss = 0.847 (2.615 sec/step)
step 150 	 loss = 0.858 (2.801 sec/step)
step 160 	 loss = 0.834 (2.875 sec/step)
step 170 	 loss = 0.846 (2.863 sec/step)
step 180 	 loss = 0.855 (2.617 sec/step)
step 190 	 loss = 0.792 (2.575 sec/step)
step 200 	 loss = 0.842 (2.883 sec/step)
step 210 	 loss = 0.839 (2.874 sec/step)
step 220 	 loss = 0.761 (2.938 sec/step)
step 230 	 loss = 0.803 (2.432 sec/step)
step 240 	 loss = 0.760 (3.047 sec/step)
step 250 	 loss = 0.805 (2.672 sec/step)
step 260 	 loss = 0.817 (2.580 sec/step)
step 270 	 loss = 0.821 (2.909 sec/step)
step 280 	 loss = 0.749 (2.830 sec/step)
step 290 	 loss = 0.762 (2.855 sec/step)
step 300 	 loss = 0.724 (2.659 sec/step)
step 310 	 loss = 0.743 (2.918 sec/step)
step 320 	 loss = 0.742 (3.196 sec/step)
step 330 	 loss = 0.784 (2.729 sec/step)
step 340 	 loss = 0.702 (3.020 sec/step)
step 350 	 loss = 0.749 (2.617 sec/step)
step 360 	 loss = 0.756 (2.742 sec/step)
step 370 	 loss = 0.724 (2.752 sec/step)
step 380 	 loss = 0.783 (2.499 sec/step)
step 390 	 loss = 0.747 (2.632 sec/step)
step 400 	 loss = 0.748 (2.680 sec/step)
step 410 	 loss = 0.770 (2.664 sec/step)
step 420 	 loss = 0.714 (2.714 sec/step)
step 430 	 loss = 0.735 (2.943 sec/step)
step 440 	 loss = 0.729 (2.520 sec/step)
step 450 	 loss = 0.779 (2.995 sec/step)
step 460 	 loss = 0.701 (2.605 sec/step)
step 470 	 loss = 0.704 (2.874 sec/step)
step 480 	 loss = 0.762 (2.809 sec/step)
step 490 	 loss = 0.699 (2.824 sec/step)
step 500 	 loss = 0.697 (2.582 sec/step)
step 510 	 loss = 0.753 (2.874 sec/step)
step 520 	 loss = 0.719 (2.718 sec/step)
step 530 	 loss = 0.658 (2.667 sec/step)
step 540 	 loss = 0.761 (2.424 sec/step)
step 550 	 loss = 0.724 (2.486 sec/step)
step 560 	 loss = 0.733 (2.668 sec/step)
step 570 	 loss = 0.711 (2.712 sec/step)
step 580 	 loss = 0.725 (3.067 sec/step)
step 590 	 loss = 0.694 (2.959 sec/step)
step 600 	 loss = 0.707 (2.562 sec/step)
step 610 	 loss = 0.727 (2.715 sec/step)
step 620 	 loss = 0.666 (2.375 sec/step)
step 630 	 loss = 0.687 (2.582 sec/step)
step 640 	 loss = 0.656 (2.692 sec/step)
step 650 	 loss = 0.663 (3.096 sec/step)
step 660 	 loss = 0.693 (2.794 sec/step)
step 670 	 loss = 0.693 (2.679 sec/step)
step 680 	 loss = 0.686 (2.876 sec/step)
step 690 	 loss = 0.663 (2.866 sec/step)
step 700 	 loss = 0.682 (2.521 sec/step)
step 710 	 loss = 0.716 (2.588 sec/step)
step 720 	 loss = 0.671 (2.948 sec/step)
step 730 	 loss = 0.737 (2.771 sec/step)
step 740 	 loss = 0.667 (2.790 sec/step)
step 750 	 loss = 0.653 (2.963 sec/step)
step 760 	 loss = 0.636 (2.722 sec/step)
step 770 	 loss = 0.701 (2.701 sec/step)
step 780 	 loss = 0.677 (2.707 sec/step)
step 790 	 loss = 0.706 (3.085 sec/step)
step 800 	 loss = 0.666 (2.646 sec/step)
step 810 	 loss = 0.646 (2.711 sec/step)
step 820 	 loss = 0.678 (3.137 sec/step)
step 830 	 loss = 0.684 (2.993 sec/step)
step 840 	 loss = 0.666 (2.993 sec/step)
step 850 	 loss = 0.678 (2.620 sec/step)
step 860 	 loss = 0.655 (2.640 sec/step)
step 870 	 loss = 0.685 (2.668 sec/step)
step 880 	 loss = 0.650 (2.792 sec/step)
step 890 	 loss = 0.650 (2.774 sec/step)
step 900 	 loss = 0.676 (2.774 sec/step)
step 910 	 loss = 0.680 (2.831 sec/step)
step 920 	 loss = 0.693 (2.569 sec/step)
step 930 	 loss = 0.654 (2.465 sec/step)
step 940 	 loss = 0.671 (3.147 sec/step)
step 950 	 loss = 0.688 (2.847 sec/step)
step 960 	 loss = 0.645 (2.730 sec/step)
step 970 	 loss = 0.679 (3.074 sec/step)
step 980 	 loss = 0.678 (2.932 sec/step)
step 990 	 loss = 0.629 (2.932 sec/step)
step 1000 	 loss = 0.671 (2.958 sec/step)
step 1010 	 loss = 0.648 (2.588 sec/step)
step 1020 	 loss = 0.668 (2.838 sec/step)
step 1030 	 loss = 0.677 (2.583 sec/step)
step 1040 	 loss = 0.654 (2.696 sec/step)
step 1050 	 loss = 0.645 (3.081 sec/step)
step 1060 	 loss = 0.702 (2.766 sec/step)
step 1070 	 loss = 0.669 (2.736 sec/step)
step 1080 	 loss = 0.642 (2.859 sec/step)
step 1090 	 loss = 0.683 (2.724 sec/step)
step 1100 	 loss = 0.666 (2.805 sec/step)
step 1110 	 loss = 0.612 (2.558 sec/step)
step 1120 	 loss = 0.619 (2.563 sec/step)
step 1130 	 loss = 0.665 (2.788 sec/step)
step 1140 	 loss = 0.625 (2.774 sec/step)
step 1150 	 loss = 0.643 (3.208 sec/step)
step 1160 	 loss = 0.631 (3.044 sec/step)
step 1170 	 loss = 0.649 (2.662 sec/step)
step 1180 	 loss = 0.624 (2.760 sec/step)
step 1190 	 loss = 0.659 (2.989 sec/step)
step 1200 	 loss = 0.614 (2.643 sec/step)
step 1210 	 loss = 0.631 (2.768 sec/step)
step 1220 	 loss = 0.612 (2.828 sec/step)
step 1230 	 loss = 0.634 (2.679 sec/step)
step 1240 	 loss = 0.671 (2.773 sec/step)
step 1250 	 loss = 0.652 (2.897 sec/step)
step 1260 	 loss = 0.637 (2.837 sec/step)
step 1270 	 loss = 0.666 (3.133 sec/step)
step 1280 	 loss = 0.644 (2.706 sec/step)
step 1290 	 loss = 0.611 (2.898 sec/step)
step 1300 	 loss = 0.660 (2.970 sec/step)
step 1310 	 loss = 0.657 (2.936 sec/step)
step 1320 	 loss = 0.638 (2.763 sec/step)
step 1330 	 loss = 0.648 (2.928 sec/step)
step 1340 	 loss = 0.630 (2.954 sec/step)
step 1350 	 loss = 0.629 (2.442 sec/step)
step 1360 	 loss = 0.631 (2.655 sec/step)
step 1370 	 loss = 0.642 (2.452 sec/step)
step 1380 	 loss = 0.644 (2.516 sec/step)
step 1390 	 loss = 0.624 (2.634 sec/step)
step 1400 	 loss = 0.607 (2.887 sec/step)
step 1410 	 loss = 0.658 (2.794 sec/step)
step 1420 	 loss = 0.581 (2.740 sec/step)
step 1430 	 loss = 0.648 (2.925 sec/step)
step 1440 	 loss = 0.644 (2.827 sec/step)
step 1450 	 loss = 0.630 (2.742 sec/step)
step 1460 	 loss = 0.643 (2.847 sec/step)
step 1470 	 loss = 0.618 (2.706 sec/step)
step 1480 	 loss = 0.654 (2.945 sec/step)
step 1490 	 loss = 0.645 (2.540 sec/step)
step 1500 	 loss = 0.617 (3.092 sec/step)
step 1510 	 loss = 0.683 (2.893 sec/step)
step 1520 	 loss = 0.648 (2.660 sec/step)
step 1530 	 loss = 0.649 (2.817 sec/step)
step 1540 	 loss = 0.603 (2.726 sec/step)
step 1550 	 loss = 0.630 (2.592 sec/step)
step 1560 	 loss = 0.625 (2.945 sec/step)
step 1570 	 loss = 0.628 (2.489 sec/step)
step 1580 	 loss = 0.615 (2.590 sec/step)
step 1590 	 loss = 0.628 (2.825 sec/step)
step 1600 	 loss = 0.658 (2.662 sec/step)
step 1610 	 loss = 0.617 (2.893 sec/step)
step 1620 	 loss = 0.617 (2.798 sec/step)
step 1630 	 loss = 0.602 (2.738 sec/step)
step 1640 	 loss = 0.592 (2.644 sec/step)
step 1650 	 loss = 0.639 (3.022 sec/step)
step 1660 	 loss = 0.631 (2.679 sec/step)
step 1670 	 loss = 0.592 (2.563 sec/step)
step 1680 	 loss = 0.615 (2.689 sec/step)
step 1690 	 loss = 0.638 (3.009 sec/step)
step 1700 	 loss = 0.637 (2.463 sec/step)
step 1710 	 loss = 0.611 (3.032 sec/step)
step 1720 	 loss = 0.593 (2.626 sec/step)
step 1730 	 loss = 0.654 (2.634 sec/step)
step 1740 	 loss = 0.634 (2.678 sec/step)
step 1750 	 loss = 0.626 (2.787 sec/step)
step 1760 	 loss = 0.646 (2.598 sec/step)
step 1770 	 loss = 0.610 (2.779 sec/step)
step 1780 	 loss = 0.632 (2.872 sec/step)
step 1790 	 loss = 0.654 (2.632 sec/step)
step 1800 	 loss = 0.628 (2.751 sec/step)
step 1810 	 loss = 0.696 (2.673 sec/step)
step 1820 	 loss = 0.601 (2.739 sec/step)
step 1830 	 loss = 0.588 (2.799 sec/step)
step 1840 	 loss = 0.626 (2.780 sec/step)
step 1850 	 loss = 0.585 (2.726 sec/step)
step 1860 	 loss = 0.579 (2.759 sec/step)
step 1870 	 loss = 0.634 (2.941 sec/step)
step 1880 	 loss = 0.605 (2.804 sec/step)
step 1890 	 loss = 0.608 (2.772 sec/step)
step 1900 	 loss = 0.564 (2.604 sec/step)
step 1910 	 loss = 0.580 (2.715 sec/step)
step 1920 	 loss = 0.629 (2.658 sec/step)
step 1930 	 loss = 0.632 (2.865 sec/step)
step 1940 	 loss = 0.590 (2.861 sec/step)
step 1950 	 loss = 0.630 (2.902 sec/step)
step 1960 	 loss = 0.603 (2.794 sec/step)
step 1970 	 loss = 0.619 (2.633 sec/step)
step 1980 	 loss = 0.605 (2.963 sec/step)
step 1990 	 loss = 0.607 (2.845 sec/step)
step 2000 	 loss = 0.633 (3.047 sec/step)
step 2010 	 loss = 0.629 (3.020 sec/step)
step 2020 	 loss = 0.591 (2.615 sec/step)
step 2030 	 loss = 0.610 (2.840 sec/step)
step 2040 	 loss = 0.622 (2.876 sec/step)
step 2050 	 loss = 0.628 (2.714 sec/step)
step 2060 	 loss = 0.607 (2.585 sec/step)
step 2070 	 loss = 0.580 (2.779 sec/step)
step 2080 	 loss = 0.592 (2.912 sec/step)
step 2090 	 loss = 0.646 (2.647 sec/step)
step 2100 	 loss = 0.649 (3.088 sec/step)
step 2110 	 loss = 0.647 (2.554 sec/step)
step 2120 	 loss = 0.600 (2.993 sec/step)
step 2130 	 loss = 0.632 (3.153 sec/step)
step 2140 	 loss = 0.592 (2.909 sec/step)
step 2150 	 loss = 0.630 (2.733 sec/step)
step 2160 	 loss = 0.628 (2.690 sec/step)
step 2170 	 loss = 0.609 (2.653 sec/step)
step 2180 	 loss = 0.616 (3.092 sec/step)
step 2190 	 loss = 0.582 (2.796 sec/step)
step 2200 	 loss = 0.595 (2.627 sec/step)
step 2210 	 loss = 0.603 (2.651 sec/step)
step 2220 	 loss = 0.569 (2.995 sec/step)
step 2230 	 loss = 0.603 (2.750 sec/step)
step 2240 	 loss = 0.609 (2.575 sec/step)
step 2250 	 loss = 0.571 (2.577 sec/step)
step 2260 	 loss = 0.553 (2.663 sec/step)
step 2270 	 loss = 0.564 (2.709 sec/step)
step 2280 	 loss = 0.605 (2.599 sec/step)
step 2290 	 loss = 0.613 (2.798 sec/step)
step 2300 	 loss = 0.637 (2.638 sec/step)
step 2310 	 loss = 0.594 (2.766 sec/step)
step 2320 	 loss = 0.607 (2.545 sec/step)
step 2330 	 loss = 0.608 (2.795 sec/step)
step 2340 	 loss = 0.559 (2.930 sec/step)
step 2350 	 loss = 0.609 (3.046 sec/step)
step 2360 	 loss = 0.608 (2.721 sec/step)
step 2370 	 loss = 0.583 (3.204 sec/step)
step 2380 	 loss = 0.592 (2.784 sec/step)
step 2390 	 loss = 0.574 (2.656 sec/step)
step 2400 	 loss = 0.606 (3.080 sec/step)
step 2410 	 loss = 0.640 (2.722 sec/step)
step 2420 	 loss = 0.600 (2.973 sec/step)
step 2430 	 loss = 0.624 (2.816 sec/step)
step 2440 	 loss = 0.618 (2.825 sec/step)
step 2450 	 loss = 0.590 (2.636 sec/step)
step 2460 	 loss = 0.555 (2.633 sec/step)
step 2470 	 loss = 0.580 (2.702 sec/step)
step 2480 	 loss = 0.610 (2.756 sec/step)
step 2490 	 loss = 0.614 (2.943 sec/step)
step 2500 	 loss = 0.572 (2.682 sec/step)
step 2510 	 loss = 0.620 (2.905 sec/step)
step 2520 	 loss = 0.590 (2.620 sec/step)
step 2530 	 loss = 0.600 (2.834 sec/step)
step 2540 	 loss = 0.585 (2.907 sec/step)
step 2550 	 loss = 0.582 (2.765 sec/step)
step 2560 	 loss = 0.600 (2.773 sec/step)
step 2570 	 loss = 0.577 (3.085 sec/step)
step 2580 	 loss = 0.605 (2.825 sec/step)
step 2590 	 loss = 0.605 (2.691 sec/step)
step 2600 	 loss = 0.557 (2.719 sec/step)
step 2610 	 loss = 0.612 (3.057 sec/step)
step 2620 	 loss = 0.554 (2.873 sec/step)
step 2630 	 loss = 0.572 (3.021 sec/step)
step 2640 	 loss = 0.599 (2.543 sec/step)
step 2650 	 loss = 0.615 (2.896 sec/step)
step 2660 	 loss = 0.598 (2.890 sec/step)
step 2670 	 loss = 0.606 (2.554 sec/step)
step 2680 	 loss = 0.595 (2.908 sec/step)
step 2690 	 loss = 0.567 (2.887 sec/step)
step 2700 	 loss = 0.616 (2.643 sec/step)
step 2710 	 loss = 0.611 (2.769 sec/step)
step 2720 	 loss = 0.575 (2.596 sec/step)
step 2730 	 loss = 0.637 (2.709 sec/step)
step 2740 	 loss = 0.592 (2.853 sec/step)
step 2750 	 loss = 0.574 (2.747 sec/step)
step 2760 	 loss = 0.578 (2.954 sec/step)
step 2770 	 loss = 0.605 (2.885 sec/step)
step 2780 	 loss = 0.601 (3.000 sec/step)
step 2790 	 loss = 0.608 (2.824 sec/step)
step 2800 	 loss = 0.566 (3.110 sec/step)
step 2810 	 loss = 0.575 (2.642 sec/step)
step 2820 	 loss = 0.583 (2.792 sec/step)
step 2830 	 loss = 0.592 (2.653 sec/step)
step 2840 	 loss = 0.604 (2.661 sec/step)
step 2850 	 loss = 0.601 (2.744 sec/step)
step 2860 	 loss = 0.567 (2.918 sec/step)
step 2870 	 loss = 0.547 (3.103 sec/step)
step 2880 	 loss = 0.571 (2.854 sec/step)
step 2890 	 loss = 0.582 (2.791 sec/step)
step 2900 	 loss = 0.578 (2.942 sec/step)
step 2910 	 loss = 0.590 (2.492 sec/step)
step 2920 	 loss = 0.567 (2.715 sec/step)
step 2930 	 loss = 0.581 (2.955 sec/step)
step 2940 	 loss = 0.578 (2.921 sec/step)
step 2950 	 loss = 0.616 (2.922 sec/step)
step 2960 	 loss = 0.594 (2.791 sec/step)
step 2970 	 loss = 0.551 (2.806 sec/step)
step 2980 	 loss = 0.574 (2.846 sec/step)
step 2990 	 loss = 0.600 (3.005 sec/step)
step 3000 	 loss = 0.597 (2.940 sec/step)
step 3010 	 loss = 0.619 (2.882 sec/step)
step 3020 	 loss = 0.614 (2.817 sec/step)
step 3030 	 loss = 0.621 (2.896 sec/step)
step 3040 	 loss = 0.555 (2.711 sec/step)
step 3050 	 loss = 0.553 (2.785 sec/step)
step 3060 	 loss = 0.546 (2.771 sec/step)
step 3070 	 loss = 0.586 (2.969 sec/step)
step 3080 	 loss = 0.562 (3.075 sec/step)
step 3090 	 loss = 0.609 (2.757 sec/step)
step 3100 	 loss = 0.628 (2.888 sec/step)
step 3110 	 loss = 0.584 (2.784 sec/step)
step 3120 	 loss = 0.581 (2.954 sec/step)
step 3130 	 loss = 0.570 (2.864 sec/step)
step 3140 	 loss = 0.550 (2.490 sec/step)
step 3150 	 loss = 0.596 (2.946 sec/step)
step 3160 	 loss = 0.600 (2.918 sec/step)
step 3170 	 loss = 0.579 (3.026 sec/step)
step 3180 	 loss = 0.570 (2.958 sec/step)
step 3190 	 loss = 0.566 (2.507 sec/step)
step 3200 	 loss = 0.547 (2.720 sec/step)
step 3210 	 loss = 0.590 (2.869 sec/step)
step 3220 	 loss = 0.613 (2.700 sec/step)
step 3230 	 loss = 0.630 (2.931 sec/step)
step 3240 	 loss = 0.567 (2.880 sec/step)
step 3250 	 loss = 0.577 (2.531 sec/step)
step 3260 	 loss = 0.582 (2.737 sec/step)
step 3270 	 loss = 0.638 (3.018 sec/step)
step 3280 	 loss = 0.531 (2.677 sec/step)
step 3290 	 loss = 0.615 (2.888 sec/step)
step 3300 	 loss = 0.569 (2.723 sec/step)
step 3310 	 loss = 0.546 (2.736 sec/step)
step 3320 	 loss = 0.596 (2.695 sec/step)
step 3330 	 loss = 0.572 (2.905 sec/step)
step 3340 	 loss = 0.570 (2.964 sec/step)
step 3350 	 loss = 0.634 (2.777 sec/step)
step 3360 	 loss = 0.532 (3.138 sec/step)
step 3370 	 loss = 0.616 (2.914 sec/step)
step 3380 	 loss = 0.573 (2.825 sec/step)
step 3390 	 loss = 0.562 (2.579 sec/step)
step 3400 	 loss = 0.575 (2.645 sec/step)
step 3410 	 loss = 0.552 (2.571 sec/step)
step 3420 	 loss = 0.572 (2.646 sec/step)
step 3430 	 loss = 0.568 (2.607 sec/step)
step 3440 	 loss = 0.560 (2.777 sec/step)
step 3450 	 loss = 0.589 (2.675 sec/step)
step 3460 	 loss = 0.533 (2.928 sec/step)
step 3470 	 loss = 0.572 (2.829 sec/step)
step 3480 	 loss = 0.561 (2.910 sec/step)
step 3490 	 loss = 0.563 (2.853 sec/step)
step 3500 	 loss = 0.586 (2.956 sec/step)
step 3510 	 loss = 0.586 (2.924 sec/step)
step 3520 	 loss = 0.574 (2.713 sec/step)
step 3530 	 loss = 0.557 (2.655 sec/step)
step 3540 	 loss = 0.589 (2.346 sec/step)
step 3550 	 loss = 0.583 (2.752 sec/step)
step 3560 	 loss = 0.573 (2.914 sec/step)
step 3570 	 loss = 0.573 (2.985 sec/step)
step 3580 	 loss = 0.582 (2.714 sec/step)
step 3590 	 loss = 0.610 (2.714 sec/step)
step 3600 	 loss = 0.516 (2.989 sec/step)
step 3610 	 loss = 0.546 (2.732 sec/step)
step 3620 	 loss = 0.578 (3.063 sec/step)
step 3630 	 loss = 0.598 (2.867 sec/step)
step 3640 	 loss = 0.570 (3.115 sec/step)
step 3650 	 loss = 0.597 (2.900 sec/step)
step 3660 	 loss = 0.596 (2.964 sec/step)
step 3670 	 loss = 0.594 (2.815 sec/step)
step 3680 	 loss = 0.602 (2.877 sec/step)
step 3690 	 loss = 0.579 (2.729 sec/step)
step 3700 	 loss = 0.569 (2.781 sec/step)
New Best Loss 0.079 < Old Best inf.  Saving...
The checkpoint has been created.
Training Epoch 1/100
step 3710 	 loss = 0.547 (2.670 sec/step)
step 3720 	 loss = 0.580 (2.569 sec/step)
step 3730 	 loss = 0.513 (2.882 sec/step)
step 3740 	 loss = 0.534 (2.727 sec/step)
step 3750 	 loss = 0.519 (2.566 sec/step)
step 3760 	 loss = 0.530 (2.852 sec/step)
step 3770 	 loss = 0.571 (2.705 sec/step)
step 3780 	 loss = 0.557 (3.042 sec/step)
step 3790 	 loss = 0.573 (2.799 sec/step)
step 3800 	 loss = 0.530 (2.894 sec/step)
step 3810 	 loss = 0.521 (2.862 sec/step)
step 3820 	 loss = 0.526 (2.683 sec/step)
step 3830 	 loss = 0.513 (2.903 sec/step)
step 3840 	 loss = 0.522 (2.831 sec/step)
step 3850 	 loss = 0.535 (2.764 sec/step)
step 3860 	 loss = 0.536 (2.700 sec/step)
step 3870 	 loss = 0.549 (2.926 sec/step)
step 3880 	 loss = 0.560 (2.839 sec/step)
step 3890 	 loss = 0.570 (3.095 sec/step)
step 3900 	 loss = 0.534 (2.903 sec/step)
step 3910 	 loss = 0.549 (2.914 sec/step)
step 3920 	 loss = 0.525 (3.156 sec/step)
step 3930 	 loss = 0.558 (2.834 sec/step)
step 3940 	 loss = 0.528 (2.991 sec/step)
step 3950 	 loss = 0.555 (2.509 sec/step)
step 3960 	 loss = 0.546 (2.401 sec/step)
step 3970 	 loss = 0.535 (2.419 sec/step)
step 3980 	 loss = 0.550 (2.442 sec/step)
step 3990 	 loss = 0.534 (2.274 sec/step)
step 4000 	 loss = 0.570 (2.442 sec/step)
step 4010 	 loss = 0.534 (2.195 sec/step)
step 4020 	 loss = 0.491 (2.284 sec/step)
step 4030 	 loss = 0.534 (2.228 sec/step)
step 4040 	 loss = 0.491 (2.369 sec/step)
step 4050 	 loss = 0.535 (3.290 sec/step)
step 4060 	 loss = 0.565 (2.593 sec/step)
step 4070 	 loss = 0.553 (2.426 sec/step)
step 4080 	 loss = 0.536 (3.006 sec/step)
step 4090 	 loss = 0.568 (2.724 sec/step)
step 4100 	 loss = 0.563 (2.651 sec/step)
step 4110 	 loss = 0.534 (2.904 sec/step)
step 4120 	 loss = 0.523 (2.988 sec/step)
step 4130 	 loss = 0.535 (3.039 sec/step)
step 4140 	 loss = 0.541 (2.783 sec/step)
step 4150 	 loss = 0.521 (2.906 sec/step)
step 4160 	 loss = 0.552 (3.053 sec/step)
step 4170 	 loss = 0.534 (2.798 sec/step)
step 4180 	 loss = 0.527 (2.610 sec/step)
step 4190 	 loss = 0.566 (2.951 sec/step)
step 4200 	 loss = 0.524 (2.379 sec/step)
step 4210 	 loss = 0.528 (2.939 sec/step)
step 4220 	 loss = 0.498 (2.748 sec/step)
step 4230 	 loss = 0.516 (2.744 sec/step)
step 4240 	 loss = 0.516 (2.975 sec/step)
step 4250 	 loss = 0.517 (2.569 sec/step)
step 4260 	 loss = 0.563 (2.922 sec/step)
step 4270 	 loss = 0.562 (2.616 sec/step)
step 4280 	 loss = 0.532 (2.864 sec/step)
step 4290 	 loss = 0.530 (2.743 sec/step)
step 4300 	 loss = 0.560 (2.708 sec/step)
step 4310 	 loss = 0.550 (2.709 sec/step)
step 4320 	 loss = 0.506 (2.836 sec/step)
step 4330 	 loss = 0.558 (2.930 sec/step)
step 4340 	 loss = 0.548 (2.852 sec/step)
step 4350 	 loss = 0.557 (2.853 sec/step)
step 4360 	 loss = 0.553 (2.758 sec/step)
step 4370 	 loss = 0.574 (2.830 sec/step)
step 4380 	 loss = 0.578 (2.551 sec/step)
step 4390 	 loss = 0.553 (2.791 sec/step)
step 4400 	 loss = 0.505 (3.100 sec/step)
step 4410 	 loss = 0.531 (2.617 sec/step)
step 4420 	 loss = 0.540 (2.684 sec/step)
step 4430 	 loss = 0.566 (2.838 sec/step)
step 4440 	 loss = 0.549 (2.880 sec/step)
step 4450 	 loss = 0.503 (2.927 sec/step)
step 4460 	 loss = 0.503 (2.766 sec/step)
step 4470 	 loss = 0.560 (2.675 sec/step)
step 4480 	 loss = 0.555 (2.910 sec/step)
step 4490 	 loss = 0.530 (2.590 sec/step)
step 4500 	 loss = 0.491 (2.958 sec/step)
step 4510 	 loss = 0.534 (2.819 sec/step)
step 4520 	 loss = 0.522 (2.753 sec/step)
step 4530 	 loss = 0.521 (2.907 sec/step)
step 4540 	 loss = 0.544 (2.889 sec/step)
step 4550 	 loss = 0.520 (2.905 sec/step)
step 4560 	 loss = 0.553 (3.038 sec/step)
step 4570 	 loss = 0.530 (2.647 sec/step)
step 4580 	 loss = 0.539 (2.759 sec/step)
step 4590 	 loss = 0.553 (2.604 sec/step)
step 4600 	 loss = 0.538 (2.676 sec/step)
step 4610 	 loss = 0.547 (2.616 sec/step)
step 4620 	 loss = 0.516 (2.730 sec/step)
step 4630 	 loss = 0.525 (2.727 sec/step)
step 4640 	 loss = 0.498 (3.115 sec/step)
step 4650 	 loss = 0.529 (2.815 sec/step)
step 4660 	 loss = 0.503 (2.757 sec/step)
step 4670 	 loss = 0.567 (2.837 sec/step)
step 4680 	 loss = 0.536 (2.743 sec/step)
step 4690 	 loss = 0.546 (2.858 sec/step)
step 4700 	 loss = 0.517 (2.690 sec/step)
step 4710 	 loss = 0.516 (2.922 sec/step)
step 4720 	 loss = 0.546 (2.707 sec/step)
step 4730 	 loss = 0.541 (2.776 sec/step)
step 4740 	 loss = 0.521 (3.073 sec/step)
step 4750 	 loss = 0.527 (2.833 sec/step)
step 4760 	 loss = 0.554 (2.503 sec/step)
step 4770 	 loss = 0.531 (3.380 sec/step)
step 4780 	 loss = 0.526 (2.684 sec/step)
step 4790 	 loss = 0.541 (2.585 sec/step)
step 4800 	 loss = 0.509 (2.832 sec/step)
step 4810 	 loss = 0.518 (2.515 sec/step)
step 4820 	 loss = 0.499 (2.825 sec/step)
step 4830 	 loss = 0.540 (2.909 sec/step)
step 4840 	 loss = 0.523 (2.975 sec/step)
step 4850 	 loss = 0.533 (2.713 sec/step)
step 4860 	 loss = 0.563 (2.575 sec/step)
step 4870 	 loss = 0.539 (2.743 sec/step)
step 4880 	 loss = 0.555 (2.688 sec/step)
step 4890 	 loss = 0.488 (2.523 sec/step)
step 4900 	 loss = 0.547 (2.511 sec/step)
step 4910 	 loss = 0.557 (2.940 sec/step)
step 4920 	 loss = 0.564 (2.924 sec/step)
step 4930 	 loss = 0.554 (2.921 sec/step)
step 4940 	 loss = 0.545 (2.515 sec/step)
step 4950 	 loss = 0.524 (2.891 sec/step)
step 4960 	 loss = 0.554 (2.948 sec/step)
step 4970 	 loss = 0.536 (2.753 sec/step)
step 4980 	 loss = 0.535 (2.570 sec/step)
step 4990 	 loss = 0.520 (2.501 sec/step)
step 5000 	 loss = 0.515 (2.716 sec/step)
step 5010 	 loss = 0.508 (2.732 sec/step)
step 5020 	 loss = 0.552 (2.703 sec/step)
step 5030 	 loss = 0.556 (2.676 sec/step)
step 5040 	 loss = 0.549 (3.089 sec/step)
step 5050 	 loss = 0.530 (2.679 sec/step)
step 5060 	 loss = 0.537 (2.634 sec/step)
step 5070 	 loss = 0.558 (2.974 sec/step)
step 5080 	 loss = 0.515 (2.821 sec/step)
step 5090 	 loss = 0.556 (2.969 sec/step)
step 5100 	 loss = 0.501 (3.031 sec/step)
step 5110 	 loss = 0.518 (3.116 sec/step)
step 5120 	 loss = 0.553 (2.561 sec/step)
step 5130 	 loss = 0.549 (2.690 sec/step)
step 5140 	 loss = 0.529 (2.748 sec/step)
step 5150 	 loss = 0.546 (2.694 sec/step)
step 5160 	 loss = 0.553 (2.509 sec/step)
step 5170 	 loss = 0.556 (3.152 sec/step)
step 5180 	 loss = 0.511 (2.745 sec/step)
step 5190 	 loss = 0.543 (2.633 sec/step)
step 5200 	 loss = 0.546 (2.887 sec/step)
step 5210 	 loss = 0.519 (2.840 sec/step)
step 5220 	 loss = 0.541 (2.913 sec/step)
step 5230 	 loss = 0.559 (2.876 sec/step)
step 5240 	 loss = 0.523 (2.592 sec/step)
step 5250 	 loss = 0.535 (2.384 sec/step)
step 5260 	 loss = 0.516 (2.824 sec/step)
step 5270 	 loss = 0.520 (2.892 sec/step)
step 5280 	 loss = 0.511 (2.847 sec/step)
step 5290 	 loss = 0.528 (2.765 sec/step)
step 5300 	 loss = 0.503 (2.473 sec/step)
step 5310 	 loss = 0.520 (2.675 sec/step)
step 5320 	 loss = 0.511 (2.778 sec/step)
step 5330 	 loss = 0.547 (2.710 sec/step)
step 5340 	 loss = 0.573 (2.784 sec/step)
step 5350 	 loss = 0.566 (2.728 sec/step)
step 5360 	 loss = 0.504 (2.950 sec/step)
step 5370 	 loss = 0.517 (2.950 sec/step)
step 5380 	 loss = 0.506 (2.955 sec/step)
step 5390 	 loss = 0.536 (2.791 sec/step)
step 5400 	 loss = 0.533 (2.980 sec/step)
step 5410 	 loss = 0.540 (2.695 sec/step)
step 5420 	 loss = 0.541 (3.007 sec/step)
step 5430 	 loss = 0.541 (2.850 sec/step)
step 5440 	 loss = 0.518 (2.863 sec/step)
step 5450 	 loss = 0.528 (2.542 sec/step)
step 5460 	 loss = 0.522 (2.818 sec/step)
step 5470 	 loss = 0.524 (2.608 sec/step)
step 5480 	 loss = 0.519 (2.530 sec/step)
step 5490 	 loss = 0.530 (2.734 sec/step)
step 5500 	 loss = 0.541 (2.694 sec/step)
step 5510 	 loss = 0.544 (2.516 sec/step)
step 5520 	 loss = 0.495 (2.950 sec/step)
step 5530 	 loss = 0.532 (2.960 sec/step)
step 5540 	 loss = 0.494 (2.637 sec/step)
step 5550 	 loss = 0.517 (2.798 sec/step)
step 5560 	 loss = 0.528 (2.719 sec/step)
step 5570 	 loss = 0.524 (2.898 sec/step)
step 5580 	 loss = 0.488 (2.882 sec/step)
step 5590 	 loss = 0.527 (2.896 sec/step)
step 5600 	 loss = 0.537 (2.860 sec/step)
step 5610 	 loss = 0.517 (2.690 sec/step)
step 5620 	 loss = 0.493 (2.656 sec/step)
step 5630 	 loss = 0.518 (2.834 sec/step)
step 5640 	 loss = 0.518 (2.680 sec/step)
step 5650 	 loss = 0.546 (2.732 sec/step)
step 5660 	 loss = 0.485 (2.955 sec/step)
step 5670 	 loss = 0.531 (3.070 sec/step)
step 5680 	 loss = 0.541 (2.774 sec/step)
step 5690 	 loss = 0.557 (2.589 sec/step)
step 5700 	 loss = 0.540 (3.013 sec/step)
step 5710 	 loss = 0.579 (2.839 sec/step)
step 5720 	 loss = 0.537 (2.586 sec/step)
step 5730 	 loss = 0.533 (3.240 sec/step)
step 5740 	 loss = 0.587 (2.548 sec/step)
step 5750 	 loss = 0.536 (2.765 sec/step)
step 5760 	 loss = 0.536 (2.663 sec/step)
step 5770 	 loss = 0.511 (2.736 sec/step)
step 5780 	 loss = 0.504 (3.006 sec/step)
step 5790 	 loss = 0.527 (3.016 sec/step)
step 5800 	 loss = 0.516 (2.754 sec/step)
step 5810 	 loss = 0.544 (3.093 sec/step)
step 5820 	 loss = 0.537 (2.867 sec/step)
step 5830 	 loss = 0.547 (2.788 sec/step)
step 5840 	 loss = 0.521 (2.787 sec/step)
step 5850 	 loss = 0.519 (2.738 sec/step)
step 5860 	 loss = 0.527 (2.917 sec/step)
step 5870 	 loss = 0.550 (3.022 sec/step)
step 5880 	 loss = 0.546 (2.640 sec/step)
step 5890 	 loss = 0.539 (2.657 sec/step)
step 5900 	 loss = 0.514 (2.629 sec/step)
step 5910 	 loss = 0.515 (2.581 sec/step)
step 5920 	 loss = 0.526 (2.769 sec/step)
step 5930 	 loss = 0.554 (2.980 sec/step)
step 5940 	 loss = 0.557 (2.604 sec/step)
step 5950 	 loss = 0.543 (2.747 sec/step)
step 5960 	 loss = 0.491 (2.650 sec/step)
step 5970 	 loss = 0.498 (2.662 sec/step)
step 5980 	 loss = 0.579 (3.110 sec/step)
step 5990 	 loss = 0.535 (2.922 sec/step)
step 6000 	 loss = 0.530 (2.812 sec/step)
step 6010 	 loss = 0.571 (2.761 sec/step)
step 6020 	 loss = 0.544 (2.597 sec/step)
step 6030 	 loss = 0.499 (2.826 sec/step)
step 6040 	 loss = 0.502 (2.850 sec/step)
step 6050 	 loss = 0.542 (2.944 sec/step)
step 6060 	 loss = 0.535 (2.961 sec/step)
step 6070 	 loss = 0.518 (3.014 sec/step)
step 6080 	 loss = 0.530 (2.780 sec/step)
step 6090 	 loss = 0.537 (2.689 sec/step)
step 6100 	 loss = 0.474 (3.041 sec/step)
step 6110 	 loss = 0.525 (2.819 sec/step)
step 6120 	 loss = 0.513 (2.573 sec/step)
step 6130 	 loss = 0.530 (2.941 sec/step)
step 6140 	 loss = 0.537 (2.815 sec/step)
step 6150 	 loss = 0.508 (3.217 sec/step)
step 6160 	 loss = 0.534 (2.610 sec/step)
step 6170 	 loss = 0.503 (3.027 sec/step)
step 6180 	 loss = 0.511 (2.918 sec/step)
step 6190 	 loss = 0.562 (2.697 sec/step)
step 6200 	 loss = 0.523 (2.980 sec/step)
step 6210 	 loss = 0.498 (2.834 sec/step)
step 6220 	 loss = 0.533 (2.761 sec/step)
step 6230 	 loss = 0.545 (2.686 sec/step)
step 6240 	 loss = 0.529 (2.811 sec/step)
step 6250 	 loss = 0.552 (2.849 sec/step)
step 6260 	 loss = 0.566 (2.700 sec/step)
step 6270 	 loss = 0.539 (2.730 sec/step)
step 6280 	 loss = 0.536 (3.095 sec/step)
step 6290 	 loss = 0.529 (2.768 sec/step)
step 6300 	 loss = 0.552 (2.665 sec/step)
step 6310 	 loss = 0.518 (2.735 sec/step)
step 6320 	 loss = 0.567 (2.730 sec/step)
step 6330 	 loss = 0.526 (2.783 sec/step)
step 6340 	 loss = 0.535 (2.867 sec/step)
step 6350 	 loss = 0.557 (2.990 sec/step)
step 6360 	 loss = 0.521 (2.687 sec/step)
step 6370 	 loss = 0.522 (2.889 sec/step)
step 6380 	 loss = 0.541 (2.708 sec/step)
step 6390 	 loss = 0.529 (3.005 sec/step)
step 6400 	 loss = 0.522 (2.697 sec/step)
step 6410 	 loss = 0.507 (2.946 sec/step)
step 6420 	 loss = 0.536 (2.869 sec/step)
step 6430 	 loss = 0.478 (2.998 sec/step)
step 6440 	 loss = 0.554 (2.944 sec/step)
step 6450 	 loss = 0.498 (3.211 sec/step)
step 6460 	 loss = 0.511 (2.955 sec/step)
step 6470 	 loss = 0.504 (2.774 sec/step)
step 6480 	 loss = 0.509 (2.933 sec/step)
step 6490 	 loss = 0.501 (2.760 sec/step)
step 6500 	 loss = 0.514 (2.891 sec/step)
step 6510 	 loss = 0.527 (2.788 sec/step)
step 6520 	 loss = 0.541 (2.742 sec/step)
step 6530 	 loss = 0.545 (2.893 sec/step)
step 6540 	 loss = 0.569 (2.829 sec/step)
step 6550 	 loss = 0.497 (2.934 sec/step)
step 6560 	 loss = 0.546 (2.984 sec/step)
step 6570 	 loss = 0.489 (2.437 sec/step)
step 6580 	 loss = 0.549 (2.765 sec/step)
step 6590 	 loss = 0.525 (2.468 sec/step)
step 6600 	 loss = 0.512 (3.272 sec/step)
step 6610 	 loss = 0.507 (2.692 sec/step)
step 6620 	 loss = 0.533 (2.967 sec/step)
step 6630 	 loss = 0.539 (2.487 sec/step)
step 6640 	 loss = 0.526 (2.637 sec/step)
step 6650 	 loss = 0.521 (2.633 sec/step)
step 6660 	 loss = 0.548 (3.084 sec/step)
step 6670 	 loss = 0.496 (2.773 sec/step)
step 6680 	 loss = 0.525 (2.770 sec/step)
step 6690 	 loss = 0.513 (2.959 sec/step)
step 6700 	 loss = 0.537 (2.819 sec/step)
step 6710 	 loss = 0.531 (2.634 sec/step)
step 6720 	 loss = 0.508 (2.920 sec/step)
step 6730 	 loss = 0.537 (2.573 sec/step)
step 6740 	 loss = 0.541 (2.844 sec/step)
step 6750 	 loss = 0.514 (2.672 sec/step)
step 6760 	 loss = 0.540 (2.675 sec/step)
step 6770 	 loss = 0.506 (2.798 sec/step)
step 6780 	 loss = 0.548 (2.931 sec/step)
step 6790 	 loss = 0.553 (3.065 sec/step)
step 6800 	 loss = 0.496 (2.674 sec/step)
step 6810 	 loss = 0.528 (3.072 sec/step)
step 6820 	 loss = 0.502 (2.706 sec/step)
step 6830 	 loss = 0.570 (3.076 sec/step)
step 6840 	 loss = 0.522 (2.540 sec/step)
step 6850 	 loss = 0.524 (2.616 sec/step)
step 6860 	 loss = 0.534 (2.653 sec/step)
step 6870 	 loss = 0.578 (3.053 sec/step)
step 6880 	 loss = 0.524 (2.644 sec/step)
step 6890 	 loss = 0.528 (2.762 sec/step)
step 6900 	 loss = 0.555 (2.613 sec/step)
step 6910 	 loss = 0.573 (2.535 sec/step)
step 6920 	 loss = 0.537 (3.033 sec/step)
step 6930 	 loss = 0.530 (2.875 sec/step)
step 6940 	 loss = 0.510 (2.744 sec/step)
step 6950 	 loss = 0.497 (2.439 sec/step)
step 6960 	 loss = 0.490 (2.743 sec/step)
step 6970 	 loss = 0.522 (2.930 sec/step)
step 6980 	 loss = 0.511 (2.660 sec/step)
step 6990 	 loss = 0.526 (3.123 sec/step)
step 7000 	 loss = 0.515 (2.785 sec/step)
step 7010 	 loss = 0.529 (2.684 sec/step)
step 7020 	 loss = 0.504 (2.865 sec/step)
step 7030 	 loss = 0.524 (2.784 sec/step)
step 7040 	 loss = 0.510 (2.881 sec/step)
step 7050 	 loss = 0.556 (2.746 sec/step)
step 7060 	 loss = 0.523 (2.625 sec/step)
step 7070 	 loss = 0.489 (2.945 sec/step)
step 7080 	 loss = 0.476 (2.625 sec/step)
step 7090 	 loss = 0.497 (2.584 sec/step)
step 7100 	 loss = 0.495 (3.111 sec/step)
step 7110 	 loss = 0.563 (2.991 sec/step)
step 7120 	 loss = 0.506 (3.044 sec/step)
step 7130 	 loss = 0.537 (3.130 sec/step)
step 7140 	 loss = 0.553 (2.438 sec/step)
step 7150 	 loss = 0.507 (3.030 sec/step)
step 7160 	 loss = 0.504 (2.723 sec/step)
step 7170 	 loss = 0.526 (2.734 sec/step)
step 7180 	 loss = 0.518 (2.905 sec/step)
step 7190 	 loss = 0.489 (2.830 sec/step)
step 7200 	 loss = 0.537 (2.804 sec/step)
step 7210 	 loss = 0.533 (2.863 sec/step)
step 7220 	 loss = 0.518 (3.000 sec/step)
step 7230 	 loss = 0.536 (2.564 sec/step)
step 7240 	 loss = 0.535 (2.898 sec/step)
step 7250 	 loss = 0.526 (2.766 sec/step)
step 7260 	 loss = 0.518 (3.056 sec/step)
step 7270 	 loss = 0.526 (2.715 sec/step)
step 7280 	 loss = 0.523 (2.934 sec/step)
step 7290 	 loss = 0.564 (2.895 sec/step)
step 7300 	 loss = 0.529 (2.972 sec/step)
step 7310 	 loss = 0.543 (2.631 sec/step)
step 7320 	 loss = 0.543 (2.803 sec/step)
step 7330 	 loss = 0.533 (2.911 sec/step)
step 7340 	 loss = 0.541 (2.686 sec/step)
step 7350 	 loss = 0.566 (2.645 sec/step)
step 7360 	 loss = 0.541 (2.703 sec/step)
step 7370 	 loss = 0.517 (2.694 sec/step)
step 7380 	 loss = 0.514 (2.789 sec/step)
step 7390 	 loss = 0.509 (2.855 sec/step)
step 7400 	 loss = 0.530 (2.951 sec/step)
step 7410 	 loss = 0.520 (2.771 sec/step)
New Best Loss 0.078 < Old Best 0.079.  Saving...
The checkpoint has been created.
Training Epoch 2/100
step 7420 	 loss = 0.458 (3.111 sec/step)
step 7430 	 loss = 0.486 (2.845 sec/step)
step 7440 	 loss = 0.474 (2.845 sec/step)
step 7450 	 loss = 0.487 (2.649 sec/step)
step 7460 	 loss = 0.478 (2.672 sec/step)
step 7470 	 loss = 0.503 (3.219 sec/step)
step 7480 	 loss = 0.476 (3.019 sec/step)
step 7490 	 loss = 0.477 (2.855 sec/step)
step 7500 	 loss = 0.451 (3.000 sec/step)
step 7510 	 loss = 0.490 (2.849 sec/step)
step 7520 	 loss = 0.481 (2.577 sec/step)
step 7530 	 loss = 0.486 (3.008 sec/step)
step 7540 	 loss = 0.455 (2.789 sec/step)
step 7550 	 loss = 0.485 (2.807 sec/step)
step 7560 	 loss = 0.491 (2.688 sec/step)
step 7570 	 loss = 0.444 (2.709 sec/step)
step 7580 	 loss = 0.438 (2.800 sec/step)
step 7590 	 loss = 0.473 (2.653 sec/step)
step 7600 	 loss = 0.443 (2.619 sec/step)
step 7610 	 loss = 0.473 (2.582 sec/step)
step 7620 	 loss = 0.508 (2.932 sec/step)
step 7630 	 loss = 0.465 (3.010 sec/step)
step 7640 	 loss = 0.477 (2.690 sec/step)
step 7650 	 loss = 0.464 (2.588 sec/step)
step 7660 	 loss = 0.500 (2.553 sec/step)
step 7670 	 loss = 0.495 (2.723 sec/step)
step 7680 	 loss = 0.470 (3.080 sec/step)
step 7690 	 loss = 0.464 (2.631 sec/step)
step 7700 	 loss = 0.449 (2.934 sec/step)
step 7710 	 loss = 0.489 (3.181 sec/step)
step 7720 	 loss = 0.468 (3.047 sec/step)
step 7730 	 loss = 0.480 (2.908 sec/step)
step 7740 	 loss = 0.490 (2.874 sec/step)
step 7750 	 loss = 0.491 (2.696 sec/step)
step 7760 	 loss = 0.495 (2.730 sec/step)
step 7770 	 loss = 0.460 (2.826 sec/step)
step 7780 	 loss = 0.457 (2.806 sec/step)
step 7790 	 loss = 0.468 (2.867 sec/step)
step 7800 	 loss = 0.483 (3.378 sec/step)
step 7810 	 loss = 0.505 (3.132 sec/step)
step 7820 	 loss = 0.477 (2.602 sec/step)
step 7830 	 loss = 0.516 (3.031 sec/step)
step 7840 	 loss = 0.444 (2.549 sec/step)
step 7850 	 loss = 0.463 (2.533 sec/step)
step 7860 	 loss = 0.463 (3.166 sec/step)
step 7870 	 loss = 0.445 (2.739 sec/step)
step 7880 	 loss = 0.475 (3.064 sec/step)
step 7890 	 loss = 0.490 (2.673 sec/step)
step 7900 	 loss = 0.451 (2.809 sec/step)
step 7910 	 loss = 0.455 (2.558 sec/step)
step 7920 	 loss = 0.443 (2.840 sec/step)
step 7930 	 loss = 0.436 (2.913 sec/step)
step 7940 	 loss = 0.472 (2.809 sec/step)
step 7950 	 loss = 0.496 (3.052 sec/step)
step 7960 	 loss = 0.493 (2.882 sec/step)
step 7970 	 loss = 0.477 (2.861 sec/step)
step 7980 	 loss = 0.496 (2.621 sec/step)
step 7990 	 loss = 0.443 (2.790 sec/step)
step 8000 	 loss = 0.452 (2.791 sec/step)
step 8010 	 loss = 0.478 (2.902 sec/step)
step 8020 	 loss = 0.431 (2.822 sec/step)
step 8030 	 loss = 0.480 (2.889 sec/step)
step 8040 	 loss = 0.475 (2.682 sec/step)
step 8050 	 loss = 0.491 (3.262 sec/step)
step 8060 	 loss = 0.464 (2.935 sec/step)
step 8070 	 loss = 0.491 (2.587 sec/step)
step 8080 	 loss = 0.475 (2.849 sec/step)
step 8090 	 loss = 0.464 (2.722 sec/step)
step 8100 	 loss = 0.482 (2.875 sec/step)
step 8110 	 loss = 0.490 (3.089 sec/step)
step 8120 	 loss = 0.480 (2.794 sec/step)
step 8130 	 loss = 0.459 (2.911 sec/step)
step 8140 	 loss = 0.478 (3.021 sec/step)
step 8150 	 loss = 0.509 (2.953 sec/step)
step 8160 	 loss = 0.493 (3.058 sec/step)
step 8170 	 loss = 0.462 (2.798 sec/step)
step 8180 	 loss = 0.461 (2.546 sec/step)
step 8190 	 loss = 0.459 (2.888 sec/step)
step 8200 	 loss = 0.473 (2.836 sec/step)
step 8210 	 loss = 0.481 (2.936 sec/step)
step 8220 	 loss = 0.433 (2.737 sec/step)
step 8230 	 loss = 0.460 (2.903 sec/step)
step 8240 	 loss = 0.469 (2.645 sec/step)
step 8250 	 loss = 0.442 (3.043 sec/step)
step 8260 	 loss = 0.479 (2.842 sec/step)
step 8270 	 loss = 0.489 (3.158 sec/step)
step 8280 	 loss = 0.486 (2.367 sec/step)
step 8290 	 loss = 0.452 (2.848 sec/step)
step 8300 	 loss = 0.470 (3.238 sec/step)
step 8310 	 loss = 0.479 (2.906 sec/step)
step 8320 	 loss = 0.475 (2.901 sec/step)
step 8330 	 loss = 0.470 (2.507 sec/step)
step 8340 	 loss = 0.476 (2.902 sec/step)
step 8350 	 loss = 0.477 (2.802 sec/step)
step 8360 	 loss = 0.494 (2.938 sec/step)
step 8370 	 loss = 0.504 (2.798 sec/step)
step 8380 	 loss = 0.449 (2.904 sec/step)
step 8390 	 loss = 0.439 (2.778 sec/step)
step 8400 	 loss = 0.516 (2.933 sec/step)
step 8410 	 loss = 0.475 (2.805 sec/step)
step 8420 	 loss = 0.487 (2.722 sec/step)
step 8430 	 loss = 0.482 (2.859 sec/step)
step 8440 	 loss = 0.468 (2.670 sec/step)
step 8450 	 loss = 0.502 (2.885 sec/step)
step 8460 	 loss = 0.491 (2.648 sec/step)
step 8470 	 loss = 0.474 (3.109 sec/step)
step 8480 	 loss = 0.469 (2.793 sec/step)
step 8490 	 loss = 0.499 (2.738 sec/step)
step 8500 	 loss = 0.439 (2.702 sec/step)
step 8510 	 loss = 0.455 (2.731 sec/step)
step 8520 	 loss = 0.481 (2.868 sec/step)
step 8530 	 loss = 0.460 (2.624 sec/step)
step 8540 	 loss = 0.458 (2.901 sec/step)
step 8550 	 loss = 0.481 (2.808 sec/step)
step 8560 	 loss = 0.495 (2.717 sec/step)
step 8570 	 loss = 0.477 (2.846 sec/step)
step 8580 	 loss = 0.476 (2.778 sec/step)
step 8590 	 loss = 0.476 (2.712 sec/step)
step 8600 	 loss = 0.471 (2.972 sec/step)
step 8610 	 loss = 0.452 (2.681 sec/step)
step 8620 	 loss = 0.487 (2.781 sec/step)
step 8630 	 loss = 0.474 (2.851 sec/step)
step 8640 	 loss = 0.498 (2.915 sec/step)
step 8650 	 loss = 0.459 (2.944 sec/step)
step 8660 	 loss = 0.490 (2.815 sec/step)
step 8670 	 loss = 0.474 (3.013 sec/step)
step 8680 	 loss = 0.483 (2.825 sec/step)
step 8690 	 loss = 0.441 (2.809 sec/step)
step 8700 	 loss = 0.494 (2.686 sec/step)
step 8710 	 loss = 0.505 (2.769 sec/step)
step 8720 	 loss = 0.454 (2.971 sec/step)
step 8730 	 loss = 0.468 (2.984 sec/step)
step 8740 	 loss = 0.482 (2.793 sec/step)
step 8750 	 loss = 0.483 (2.920 sec/step)
step 8760 	 loss = 0.478 (2.953 sec/step)
step 8770 	 loss = 0.479 (3.091 sec/step)
step 8780 	 loss = 0.487 (2.727 sec/step)
step 8790 	 loss = 0.482 (3.043 sec/step)
step 8800 	 loss = 0.464 (3.128 sec/step)
step 8810 	 loss = 0.463 (2.620 sec/step)
step 8820 	 loss = 0.467 (2.933 sec/step)
step 8830 	 loss = 0.481 (2.822 sec/step)
step 8840 	 loss = 0.497 (3.021 sec/step)
step 8850 	 loss = 0.467 (2.867 sec/step)
step 8860 	 loss = 0.496 (2.621 sec/step)
step 8870 	 loss = 0.507 (3.080 sec/step)
step 8880 	 loss = 0.489 (2.801 sec/step)
step 8890 	 loss = 0.508 (2.731 sec/step)
step 8900 	 loss = 0.468 (2.962 sec/step)
step 8910 	 loss = 0.462 (2.820 sec/step)
step 8920 	 loss = 0.470 (2.679 sec/step)
step 8930 	 loss = 0.508 (2.637 sec/step)
step 8940 	 loss = 0.433 (2.948 sec/step)
step 8950 	 loss = 0.454 (2.699 sec/step)
step 8960 	 loss = 0.471 (2.923 sec/step)
step 8970 	 loss = 0.500 (2.814 sec/step)
step 8980 	 loss = 0.459 (2.875 sec/step)
step 8990 	 loss = 0.462 (2.461 sec/step)
step 9000 	 loss = 0.482 (2.832 sec/step)
step 9010 	 loss = 0.497 (2.649 sec/step)
step 9020 	 loss = 0.466 (2.812 sec/step)
step 9030 	 loss = 0.480 (2.893 sec/step)
step 9040 	 loss = 0.510 (2.703 sec/step)
step 9050 	 loss = 0.495 (3.247 sec/step)
step 9060 	 loss = 0.482 (2.794 sec/step)
step 9070 	 loss = 0.489 (2.778 sec/step)
step 9080 	 loss = 0.480 (2.739 sec/step)
step 9090 	 loss = 0.484 (2.913 sec/step)
step 9100 	 loss = 0.474 (2.653 sec/step)
step 9110 	 loss = 0.478 (2.384 sec/step)
step 9120 	 loss = 0.489 (2.784 sec/step)
step 9130 	 loss = 0.485 (3.130 sec/step)
step 9140 	 loss = 0.488 (2.696 sec/step)
step 9150 	 loss = 0.481 (2.974 sec/step)
step 9160 	 loss = 0.469 (2.631 sec/step)
step 9170 	 loss = 0.501 (2.824 sec/step)
step 9180 	 loss = 0.487 (2.593 sec/step)
step 9190 	 loss = 0.458 (2.642 sec/step)
step 9200 	 loss = 0.472 (2.566 sec/step)
step 9210 	 loss = 0.480 (2.740 sec/step)
step 9220 	 loss = 0.495 (2.713 sec/step)
step 9230 	 loss = 0.490 (2.784 sec/step)
step 9240 	 loss = 0.480 (2.721 sec/step)
step 9250 	 loss = 0.492 (2.792 sec/step)
step 9260 	 loss = 0.480 (2.786 sec/step)
step 9270 	 loss = 0.485 (3.261 sec/step)
step 9280 	 loss = 0.452 (3.000 sec/step)
step 9290 	 loss = 0.496 (2.572 sec/step)
step 9300 	 loss = 0.453 (3.277 sec/step)
step 9310 	 loss = 0.472 (3.067 sec/step)
step 9320 	 loss = 0.508 (2.973 sec/step)
step 9330 	 loss = 0.489 (3.282 sec/step)
step 9340 	 loss = 0.479 (2.794 sec/step)
step 9350 	 loss = 0.495 (2.862 sec/step)
step 9360 	 loss = 0.480 (2.798 sec/step)
step 9370 	 loss = 0.477 (2.849 sec/step)
step 9380 	 loss = 0.495 (2.458 sec/step)
step 9390 	 loss = 0.498 (2.622 sec/step)
step 9400 	 loss = 0.472 (2.888 sec/step)
step 9410 	 loss = 0.469 (2.847 sec/step)
step 9420 	 loss = 0.471 (2.677 sec/step)
step 9430 	 loss = 0.524 (2.812 sec/step)
step 9440 	 loss = 0.478 (2.861 sec/step)
step 9450 	 loss = 0.467 (2.610 sec/step)
step 9460 	 loss = 0.495 (2.768 sec/step)
step 9470 	 loss = 0.466 (2.633 sec/step)
step 9480 	 loss = 0.491 (2.646 sec/step)
step 9490 	 loss = 0.470 (2.892 sec/step)
step 9500 	 loss = 0.491 (2.863 sec/step)
step 9510 	 loss = 0.501 (2.968 sec/step)
step 9520 	 loss = 0.493 (2.766 sec/step)
step 9530 	 loss = 0.479 (2.652 sec/step)
step 9540 	 loss = 0.511 (2.829 sec/step)
step 9550 	 loss = 0.478 (2.752 sec/step)
step 9560 	 loss = 0.478 (2.718 sec/step)
step 9570 	 loss = 0.480 (2.868 sec/step)
step 9580 	 loss = 0.477 (2.813 sec/step)
step 9590 	 loss = 0.494 (2.885 sec/step)
step 9600 	 loss = 0.485 (2.863 sec/step)
step 9610 	 loss = 0.486 (2.871 sec/step)
step 9620 	 loss = 0.475 (2.753 sec/step)
step 9630 	 loss = 0.506 (3.155 sec/step)
step 9640 	 loss = 0.487 (2.614 sec/step)
step 9650 	 loss = 0.508 (2.272 sec/step)
step 9660 	 loss = 0.458 (2.308 sec/step)
step 9670 	 loss = 0.454 (2.364 sec/step)
step 9680 	 loss = 0.464 (2.294 sec/step)
step 9690 	 loss = 0.478 (2.395 sec/step)
step 9700 	 loss = 0.474 (2.406 sec/step)
step 9710 	 loss = 0.463 (2.254 sec/step)
step 9720 	 loss = 0.495 (2.337 sec/step)
step 9730 	 loss = 0.449 (2.360 sec/step)
step 9740 	 loss = 0.458 (2.266 sec/step)
step 9750 	 loss = 0.507 (2.414 sec/step)
step 9760 	 loss = 0.473 (2.161 sec/step)
step 9770 	 loss = 0.487 (2.266 sec/step)
step 9780 	 loss = 0.491 (2.279 sec/step)
step 9790 	 loss = 0.489 (2.291 sec/step)
step 9800 	 loss = 0.476 (3.174 sec/step)
step 9810 	 loss = 0.475 (2.655 sec/step)
step 9820 	 loss = 0.477 (2.467 sec/step)
step 9830 	 loss = 0.483 (2.697 sec/step)
step 9840 	 loss = 0.482 (2.845 sec/step)
step 9850 	 loss = 0.468 (2.524 sec/step)
step 9860 	 loss = 0.477 (3.226 sec/step)
step 9870 	 loss = 0.478 (2.980 sec/step)
step 9880 	 loss = 0.463 (2.865 sec/step)
step 9890 	 loss = 0.490 (2.874 sec/step)
step 9900 	 loss = 0.475 (2.866 sec/step)
step 9910 	 loss = 0.474 (2.603 sec/step)
step 9920 	 loss = 0.471 (2.535 sec/step)
step 9930 	 loss = 0.511 (2.784 sec/step)
step 9940 	 loss = 0.485 (2.750 sec/step)
step 9950 	 loss = 0.496 (2.687 sec/step)
step 9960 	 loss = 0.482 (2.884 sec/step)
step 9970 	 loss = 0.463 (3.032 sec/step)
step 9980 	 loss = 0.500 (2.961 sec/step)
step 9990 	 loss = 0.487 (2.829 sec/step)
step 10000 	 loss = 0.469 (3.043 sec/step)
step 10010 	 loss = 0.432 (2.571 sec/step)
step 10020 	 loss = 0.487 (2.660 sec/step)
step 10030 	 loss = 0.460 (2.657 sec/step)
step 10040 	 loss = 0.473 (2.485 sec/step)
step 10050 	 loss = 0.505 (2.723 sec/step)
step 10060 	 loss = 0.478 (2.758 sec/step)
step 10070 	 loss = 0.492 (2.854 sec/step)
step 10080 	 loss = 0.482 (2.638 sec/step)
step 10090 	 loss = 0.478 (2.505 sec/step)
step 10100 	 loss = 0.510 (2.770 sec/step)
step 10110 	 loss = 0.480 (2.851 sec/step)
step 10120 	 loss = 0.492 (3.126 sec/step)
step 10130 	 loss = 0.469 (3.149 sec/step)
step 10140 	 loss = 0.484 (2.754 sec/step)
step 10150 	 loss = 0.482 (2.879 sec/step)
step 10160 	 loss = 0.474 (2.995 sec/step)
step 10170 	 loss = 0.495 (3.261 sec/step)
step 10180 	 loss = 0.498 (2.577 sec/step)
step 10190 	 loss = 0.455 (2.736 sec/step)
step 10200 	 loss = 0.472 (3.024 sec/step)
step 10210 	 loss = 0.453 (2.618 sec/step)
step 10220 	 loss = 0.513 (3.007 sec/step)
step 10230 	 loss = 0.471 (2.848 sec/step)
step 10240 	 loss = 0.488 (2.744 sec/step)
step 10250 	 loss = 0.483 (2.591 sec/step)
step 10260 	 loss = 0.479 (3.181 sec/step)
step 10270 	 loss = 0.471 (2.748 sec/step)
step 10280 	 loss = 0.467 (3.225 sec/step)
step 10290 	 loss = 0.479 (2.943 sec/step)
step 10300 	 loss = 0.492 (2.876 sec/step)
step 10310 	 loss = 0.490 (2.699 sec/step)
step 10320 	 loss = 0.459 (3.119 sec/step)
step 10330 	 loss = 0.443 (3.022 sec/step)
step 10340 	 loss = 0.490 (2.778 sec/step)
step 10350 	 loss = 0.448 (2.690 sec/step)
step 10360 	 loss = 0.465 (2.781 sec/step)
step 10370 	 loss = 0.440 (3.042 sec/step)
step 10380 	 loss = 0.469 (2.765 sec/step)
step 10390 	 loss = 0.471 (3.160 sec/step)
step 10400 	 loss = 0.479 (2.848 sec/step)
step 10410 	 loss = 0.518 (3.052 sec/step)
step 10420 	 loss = 0.462 (2.627 sec/step)
step 10430 	 loss = 0.487 (2.558 sec/step)
step 10440 	 loss = 0.480 (2.775 sec/step)
step 10450 	 loss = 0.494 (2.632 sec/step)
step 10460 	 loss = 0.477 (2.722 sec/step)
step 10470 	 loss = 0.462 (2.870 sec/step)
step 10480 	 loss = 0.462 (2.733 sec/step)
step 10490 	 loss = 0.476 (2.953 sec/step)
step 10500 	 loss = 0.498 (2.738 sec/step)
step 10510 	 loss = 0.487 (2.785 sec/step)
step 10520 	 loss = 0.474 (2.959 sec/step)
step 10530 	 loss = 0.470 (2.781 sec/step)
step 10540 	 loss = 0.458 (2.930 sec/step)
step 10550 	 loss = 0.480 (3.263 sec/step)
step 10560 	 loss = 0.482 (2.584 sec/step)
step 10570 	 loss = 0.479 (2.728 sec/step)
step 10580 	 loss = 0.479 (2.812 sec/step)
step 10590 	 loss = 0.473 (2.859 sec/step)
step 10600 	 loss = 0.496 (2.509 sec/step)
step 10610 	 loss = 0.456 (2.920 sec/step)
step 10620 	 loss = 0.481 (2.707 sec/step)
step 10630 	 loss = 0.461 (2.759 sec/step)
step 10640 	 loss = 0.511 (3.438 sec/step)
step 10650 	 loss = 0.478 (2.605 sec/step)
step 10660 	 loss = 0.479 (3.172 sec/step)
step 10670 	 loss = 0.496 (2.731 sec/step)
step 10680 	 loss = 0.485 (2.929 sec/step)
step 10690 	 loss = 0.478 (2.851 sec/step)
step 10700 	 loss = 0.484 (2.846 sec/step)
step 10710 	 loss = 0.476 (2.903 sec/step)
step 10720 	 loss = 0.520 (3.207 sec/step)
step 10730 	 loss = 0.502 (2.950 sec/step)
step 10740 	 loss = 0.493 (2.977 sec/step)
step 10750 	 loss = 0.443 (3.161 sec/step)
step 10760 	 loss = 0.444 (2.731 sec/step)
step 10770 	 loss = 0.482 (2.630 sec/step)
step 10780 	 loss = 0.445 (2.657 sec/step)
step 10790 	 loss = 0.481 (2.870 sec/step)
step 10800 	 loss = 0.471 (3.087 sec/step)
step 10810 	 loss = 0.473 (2.788 sec/step)
step 10820 	 loss = 0.451 (3.024 sec/step)
step 10830 	 loss = 0.461 (3.010 sec/step)
step 10840 	 loss = 0.436 (2.766 sec/step)
step 10850 	 loss = 0.509 (2.864 sec/step)
step 10860 	 loss = 0.475 (3.195 sec/step)
step 10870 	 loss = 0.504 (3.010 sec/step)
step 10880 	 loss = 0.466 (2.815 sec/step)
step 10890 	 loss = 0.474 (2.667 sec/step)
step 10900 	 loss = 0.468 (2.941 sec/step)
step 10910 	 loss = 0.498 (3.116 sec/step)
step 10920 	 loss = 0.438 (2.983 sec/step)
step 10930 	 loss = 0.477 (2.563 sec/step)
step 10940 	 loss = 0.466 (3.040 sec/step)
step 10950 	 loss = 0.462 (2.734 sec/step)
step 10960 	 loss = 0.528 (2.781 sec/step)
step 10970 	 loss = 0.489 (2.416 sec/step)
step 10980 	 loss = 0.495 (2.663 sec/step)
step 10990 	 loss = 0.476 (2.813 sec/step)
step 11000 	 loss = 0.503 (2.734 sec/step)
step 11010 	 loss = 0.507 (2.879 sec/step)
step 11020 	 loss = 0.483 (2.521 sec/step)
step 11030 	 loss = 0.462 (2.822 sec/step)
step 11040 	 loss = 0.498 (3.057 sec/step)
step 11050 	 loss = 0.471 (3.088 sec/step)
step 11060 	 loss = 0.489 (2.713 sec/step)
step 11070 	 loss = 0.498 (3.029 sec/step)
step 11080 	 loss = 0.506 (3.072 sec/step)
step 11090 	 loss = 0.488 (2.714 sec/step)
step 11100 	 loss = 0.488 (2.923 sec/step)
step 11110 	 loss = 0.503 (2.743 sec/step)
step 11120 	 loss = 0.489 (2.738 sec/step)
New Best Loss 0.076 < Old Best 0.078.  Saving...
The checkpoint has been created.
Training Epoch 3/100
step 11130 	 loss = 0.412 (2.729 sec/step)
step 11140 	 loss = 0.406 (2.611 sec/step)
step 11150 	 loss = 0.441 (2.823 sec/step)
step 11160 	 loss = 0.405 (2.764 sec/step)
step 11170 	 loss = 0.445 (2.971 sec/step)
step 11180 	 loss = 0.437 (2.905 sec/step)
step 11190 	 loss = 0.440 (2.479 sec/step)
step 11200 	 loss = 0.414 (3.013 sec/step)
step 11210 	 loss = 0.408 (2.567 sec/step)
step 11220 	 loss = 0.438 (2.862 sec/step)
step 11230 	 loss = 0.450 (2.965 sec/step)
step 11240 	 loss = 0.462 (2.727 sec/step)
step 11250 	 loss = 0.419 (3.032 sec/step)
step 11260 	 loss = 0.454 (2.595 sec/step)
step 11270 	 loss = 0.417 (2.940 sec/step)
step 11280 	 loss = 0.428 (2.837 sec/step)
step 11290 	 loss = 0.401 (3.012 sec/step)
step 11300 	 loss = 0.403 (2.735 sec/step)
step 11310 	 loss = 0.407 (2.747 sec/step)
step 11320 	 loss = 0.434 (2.945 sec/step)
step 11330 	 loss = 0.453 (2.798 sec/step)
step 11340 	 loss = 0.443 (2.814 sec/step)
step 11350 	 loss = 0.433 (2.789 sec/step)
step 11360 	 loss = 0.419 (2.678 sec/step)
step 11370 	 loss = 0.410 (2.802 sec/step)
step 11380 	 loss = 0.415 (2.755 sec/step)
step 11390 	 loss = 0.435 (2.777 sec/step)
step 11400 	 loss = 0.440 (2.636 sec/step)
step 11410 	 loss = 0.437 (2.884 sec/step)
step 11420 	 loss = 0.404 (2.879 sec/step)
step 11430 	 loss = 0.427 (2.620 sec/step)
step 11440 	 loss = 0.442 (2.830 sec/step)
step 11450 	 loss = 0.426 (2.931 sec/step)
step 11460 	 loss = 0.438 (2.873 sec/step)
step 11470 	 loss = 0.459 (2.593 sec/step)
step 11480 	 loss = 0.421 (2.886 sec/step)
step 11490 	 loss = 0.424 (2.824 sec/step)
step 11500 	 loss = 0.428 (2.663 sec/step)
step 11510 	 loss = 0.418 (2.946 sec/step)
step 11520 	 loss = 0.411 (2.754 sec/step)
step 11530 	 loss = 0.406 (2.639 sec/step)
step 11540 	 loss = 0.425 (2.862 sec/step)
step 11550 	 loss = 0.426 (2.804 sec/step)
step 11560 	 loss = 0.427 (2.969 sec/step)
step 11570 	 loss = 0.403 (2.838 sec/step)
step 11580 	 loss = 0.429 (2.729 sec/step)
step 11590 	 loss = 0.406 (2.942 sec/step)
step 11600 	 loss = 0.441 (3.017 sec/step)
step 11610 	 loss = 0.401 (2.781 sec/step)
step 11620 	 loss = 0.442 (2.905 sec/step)
step 11630 	 loss = 0.414 (3.036 sec/step)
step 11640 	 loss = 0.459 (2.795 sec/step)
step 11650 	 loss = 0.429 (2.769 sec/step)
step 11660 	 loss = 0.419 (2.855 sec/step)
step 11670 	 loss = 0.417 (3.041 sec/step)
step 11680 	 loss = 0.431 (2.659 sec/step)
step 11690 	 loss = 0.448 (2.886 sec/step)
step 11700 	 loss = 0.469 (2.876 sec/step)
step 11710 	 loss = 0.411 (2.882 sec/step)
step 11720 	 loss = 0.403 (2.705 sec/step)
step 11730 	 loss = 0.454 (2.861 sec/step)
step 11740 	 loss = 0.427 (3.070 sec/step)
step 11750 	 loss = 0.450 (2.768 sec/step)
step 11760 	 loss = 0.411 (2.692 sec/step)
step 11770 	 loss = 0.442 (2.895 sec/step)
step 11780 	 loss = 0.421 (2.650 sec/step)
step 11790 	 loss = 0.419 (2.759 sec/step)
step 11800 	 loss = 0.442 (2.532 sec/step)
step 11810 	 loss = 0.433 (3.072 sec/step)
step 11820 	 loss = 0.463 (2.990 sec/step)
step 11830 	 loss = 0.419 (2.633 sec/step)
step 11840 	 loss = 0.440 (2.759 sec/step)
step 11850 	 loss = 0.437 (2.700 sec/step)
step 11860 	 loss = 0.427 (2.920 sec/step)
step 11870 	 loss = 0.423 (2.502 sec/step)
step 11880 	 loss = 0.454 (2.832 sec/step)
step 11890 	 loss = 0.459 (2.795 sec/step)
step 11900 	 loss = 0.439 (2.734 sec/step)
step 11910 	 loss = 0.420 (3.039 sec/step)
step 11920 	 loss = 0.420 (2.661 sec/step)
step 11930 	 loss = 0.459 (3.142 sec/step)
step 11940 	 loss = 0.443 (2.929 sec/step)
step 11950 	 loss = 0.434 (2.951 sec/step)
step 11960 	 loss = 0.451 (3.037 sec/step)
step 11970 	 loss = 0.419 (2.684 sec/step)
step 11980 	 loss = 0.453 (2.902 sec/step)
step 11990 	 loss = 0.419 (3.374 sec/step)
step 12000 	 loss = 0.442 (2.591 sec/step)
step 12010 	 loss = 0.420 (2.820 sec/step)
step 12020 	 loss = 0.453 (2.755 sec/step)
step 12030 	 loss = 0.421 (2.440 sec/step)
step 12040 	 loss = 0.444 (3.215 sec/step)
step 12050 	 loss = 0.424 (2.714 sec/step)
step 12060 	 loss = 0.432 (3.497 sec/step)
step 12070 	 loss = 0.431 (2.631 sec/step)
step 12080 	 loss = 0.419 (2.729 sec/step)
step 12090 	 loss = 0.434 (2.670 sec/step)
step 12100 	 loss = 0.441 (3.093 sec/step)
step 12110 	 loss = 0.452 (2.862 sec/step)
step 12120 	 loss = 0.449 (2.848 sec/step)
step 12130 	 loss = 0.436 (2.916 sec/step)
step 12140 	 loss = 0.427 (2.997 sec/step)
step 12150 	 loss = 0.428 (2.709 sec/step)
step 12160 	 loss = 0.474 (2.834 sec/step)
step 12170 	 loss = 0.412 (2.685 sec/step)
step 12180 	 loss = 0.441 (2.848 sec/step)
step 12190 	 loss = 0.442 (2.860 sec/step)
step 12200 	 loss = 0.433 (2.416 sec/step)
step 12210 	 loss = 0.450 (2.772 sec/step)
step 12220 	 loss = 0.420 (2.903 sec/step)
step 12230 	 loss = 0.410 (2.897 sec/step)
step 12240 	 loss = 0.423 (3.027 sec/step)
step 12250 	 loss = 0.445 (2.862 sec/step)
step 12260 	 loss = 0.426 (2.756 sec/step)
step 12270 	 loss = 0.408 (2.961 sec/step)
step 12280 	 loss = 0.412 (3.137 sec/step)
step 12290 	 loss = 0.427 (2.909 sec/step)
step 12300 	 loss = 0.483 (3.047 sec/step)
step 12310 	 loss = 0.439 (2.714 sec/step)
step 12320 	 loss = 0.416 (2.969 sec/step)
step 12330 	 loss = 0.424 (3.098 sec/step)
step 12340 	 loss = 0.435 (3.113 sec/step)
step 12350 	 loss = 0.443 (2.866 sec/step)
step 12360 	 loss = 0.418 (2.986 sec/step)
step 12370 	 loss = 0.434 (3.356 sec/step)
step 12380 	 loss = 0.419 (3.130 sec/step)
step 12390 	 loss = 0.420 (2.843 sec/step)
step 12400 	 loss = 0.421 (2.863 sec/step)
step 12410 	 loss = 0.430 (2.567 sec/step)
step 12420 	 loss = 0.459 (2.608 sec/step)
step 12430 	 loss = 0.447 (3.085 sec/step)
step 12440 	 loss = 0.483 (2.652 sec/step)
step 12450 	 loss = 0.439 (2.967 sec/step)
step 12460 	 loss = 0.442 (2.923 sec/step)
step 12470 	 loss = 0.425 (3.114 sec/step)
step 12480 	 loss = 0.431 (2.725 sec/step)
step 12490 	 loss = 0.451 (2.679 sec/step)
step 12500 	 loss = 0.432 (2.511 sec/step)
step 12510 	 loss = 0.436 (2.763 sec/step)
step 12520 	 loss = 0.440 (2.860 sec/step)
step 12530 	 loss = 0.429 (2.969 sec/step)
step 12540 	 loss = 0.456 (2.721 sec/step)
step 12550 	 loss = 0.463 (2.766 sec/step)
step 12560 	 loss = 0.434 (2.474 sec/step)
step 12570 	 loss = 0.424 (2.907 sec/step)
step 12580 	 loss = 0.448 (2.748 sec/step)
step 12590 	 loss = 0.441 (2.847 sec/step)
step 12600 	 loss = 0.421 (2.965 sec/step)
step 12610 	 loss = 0.428 (2.639 sec/step)
step 12620 	 loss = 0.434 (2.722 sec/step)
step 12630 	 loss = 0.444 (3.072 sec/step)
step 12640 	 loss = 0.427 (2.709 sec/step)
step 12650 	 loss = 0.446 (3.059 sec/step)
step 12660 	 loss = 0.464 (2.835 sec/step)
step 12670 	 loss = 0.440 (2.974 sec/step)
step 12680 	 loss = 0.408 (3.097 sec/step)
step 12690 	 loss = 0.424 (2.808 sec/step)
step 12700 	 loss = 0.439 (2.617 sec/step)
step 12710 	 loss = 0.449 (2.995 sec/step)
step 12720 	 loss = 0.431 (3.001 sec/step)
step 12730 	 loss = 0.444 (2.612 sec/step)
step 12740 	 loss = 0.455 (2.719 sec/step)
step 12750 	 loss = 0.435 (2.796 sec/step)
step 12760 	 loss = 0.441 (3.128 sec/step)
step 12770 	 loss = 0.427 (3.123 sec/step)
step 12780 	 loss = 0.452 (2.637 sec/step)
step 12790 	 loss = 0.448 (2.945 sec/step)
step 12800 	 loss = 0.443 (2.805 sec/step)
step 12810 	 loss = 0.459 (2.884 sec/step)
step 12820 	 loss = 0.442 (2.812 sec/step)
step 12830 	 loss = 0.419 (2.913 sec/step)
step 12840 	 loss = 0.412 (2.914 sec/step)
step 12850 	 loss = 0.417 (2.558 sec/step)
step 12860 	 loss = 0.452 (2.699 sec/step)
step 12870 	 loss = 0.449 (2.808 sec/step)
step 12880 	 loss = 0.441 (2.780 sec/step)
step 12890 	 loss = 0.424 (2.758 sec/step)
step 12900 	 loss = 0.428 (2.859 sec/step)
step 12910 	 loss = 0.419 (2.837 sec/step)
step 12920 	 loss = 0.482 (2.868 sec/step)
step 12930 	 loss = 0.422 (2.812 sec/step)
step 12940 	 loss = 0.454 (3.299 sec/step)
step 12950 	 loss = 0.438 (2.761 sec/step)
step 12960 	 loss = 0.437 (2.950 sec/step)
step 12970 	 loss = 0.416 (3.160 sec/step)
step 12980 	 loss = 0.450 (2.876 sec/step)
step 12990 	 loss = 0.419 (2.941 sec/step)
step 13000 	 loss = 0.425 (2.783 sec/step)
step 13010 	 loss = 0.420 (2.773 sec/step)
step 13020 	 loss = 0.455 (3.230 sec/step)
step 13030 	 loss = 0.395 (2.818 sec/step)
step 13040 	 loss = 0.455 (3.035 sec/step)
step 13050 	 loss = 0.430 (3.192 sec/step)
step 13060 	 loss = 0.443 (2.773 sec/step)
step 13070 	 loss = 0.435 (2.792 sec/step)
step 13080 	 loss = 0.455 (2.684 sec/step)
step 13090 	 loss = 0.398 (2.742 sec/step)
step 13100 	 loss = 0.424 (2.692 sec/step)
step 13110 	 loss = 0.426 (2.693 sec/step)
step 13120 	 loss = 0.449 (3.206 sec/step)
step 13130 	 loss = 0.456 (2.537 sec/step)
step 13140 	 loss = 0.445 (3.098 sec/step)
step 13150 	 loss = 0.435 (2.726 sec/step)
step 13160 	 loss = 0.421 (2.994 sec/step)
step 13170 	 loss = 0.431 (3.029 sec/step)
step 13180 	 loss = 0.443 (2.764 sec/step)
step 13190 	 loss = 0.456 (2.829 sec/step)
step 13200 	 loss = 0.440 (2.600 sec/step)
step 13210 	 loss = 0.438 (3.260 sec/step)
step 13220 	 loss = 0.460 (2.869 sec/step)
step 13230 	 loss = 0.458 (2.835 sec/step)
step 13240 	 loss = 0.456 (2.843 sec/step)
step 13250 	 loss = 0.433 (2.774 sec/step)
step 13260 	 loss = 0.427 (2.713 sec/step)
step 13270 	 loss = 0.421 (3.036 sec/step)
step 13280 	 loss = 0.447 (3.224 sec/step)
step 13290 	 loss = 0.435 (2.710 sec/step)
step 13300 	 loss = 0.428 (2.707 sec/step)
step 13310 	 loss = 0.445 (2.469 sec/step)
step 13320 	 loss = 0.461 (3.129 sec/step)
step 13330 	 loss = 0.430 (2.699 sec/step)
step 13340 	 loss = 0.466 (2.708 sec/step)
step 13350 	 loss = 0.459 (3.034 sec/step)
step 13360 	 loss = 0.423 (2.802 sec/step)
step 13370 	 loss = 0.428 (2.683 sec/step)
step 13380 	 loss = 0.425 (3.095 sec/step)
step 13390 	 loss = 0.421 (2.829 sec/step)
step 13400 	 loss = 0.448 (2.879 sec/step)
step 13410 	 loss = 0.463 (2.713 sec/step)
step 13420 	 loss = 0.441 (2.841 sec/step)
step 13430 	 loss = 0.443 (2.853 sec/step)
step 13440 	 loss = 0.423 (3.218 sec/step)
step 13450 	 loss = 0.458 (2.782 sec/step)
step 13460 	 loss = 0.433 (2.952 sec/step)
step 13470 	 loss = 0.467 (2.854 sec/step)
step 13480 	 loss = 0.420 (2.869 sec/step)
step 13490 	 loss = 0.406 (2.763 sec/step)
step 13500 	 loss = 0.430 (2.877 sec/step)
step 13510 	 loss = 0.446 (2.880 sec/step)
step 13520 	 loss = 0.454 (2.776 sec/step)
step 13530 	 loss = 0.451 (2.606 sec/step)
step 13540 	 loss = 0.428 (2.925 sec/step)
step 13550 	 loss = 0.433 (2.576 sec/step)
step 13560 	 loss = 0.451 (2.625 sec/step)
step 13570 	 loss = 0.458 (2.714 sec/step)
step 13580 	 loss = 0.420 (3.233 sec/step)
step 13590 	 loss = 0.428 (2.760 sec/step)
step 13600 	 loss = 0.424 (2.949 sec/step)
step 13610 	 loss = 0.447 (2.712 sec/step)
step 13620 	 loss = 0.410 (2.801 sec/step)
step 13630 	 loss = 0.429 (2.759 sec/step)
step 13640 	 loss = 0.440 (2.607 sec/step)
step 13650 	 loss = 0.419 (2.830 sec/step)
step 13660 	 loss = 0.433 (2.740 sec/step)
step 13670 	 loss = 0.428 (2.994 sec/step)
step 13680 	 loss = 0.432 (2.887 sec/step)
step 13690 	 loss = 0.423 (2.835 sec/step)
step 13700 	 loss = 0.427 (2.995 sec/step)
step 13710 	 loss = 0.463 (3.132 sec/step)
step 13720 	 loss = 0.440 (2.679 sec/step)
step 13730 	 loss = 0.394 (3.014 sec/step)
step 13740 	 loss = 0.425 (2.775 sec/step)
step 13750 	 loss = 0.437 (2.852 sec/step)
step 13760 	 loss = 0.466 (2.712 sec/step)
step 13770 	 loss = 0.414 (2.626 sec/step)
step 13780 	 loss = 0.411 (2.570 sec/step)
step 13790 	 loss = 0.424 (2.849 sec/step)
step 13800 	 loss = 0.449 (2.820 sec/step)
step 13810 	 loss = 0.437 (2.729 sec/step)
step 13820 	 loss = 0.435 (2.640 sec/step)
step 13830 	 loss = 0.452 (2.854 sec/step)
step 13840 	 loss = 0.453 (2.502 sec/step)
step 13850 	 loss = 0.427 (2.698 sec/step)
step 13860 	 loss = 0.456 (2.864 sec/step)
step 13870 	 loss = 0.435 (2.866 sec/step)
step 13880 	 loss = 0.440 (2.756 sec/step)
step 13890 	 loss = 0.455 (2.824 sec/step)
step 13900 	 loss = 0.416 (2.703 sec/step)
step 13910 	 loss = 0.433 (2.734 sec/step)
step 13920 	 loss = 0.426 (2.998 sec/step)
step 13930 	 loss = 0.408 (2.934 sec/step)
step 13940 	 loss = 0.461 (3.155 sec/step)
step 13950 	 loss = 0.453 (2.853 sec/step)
step 13960 	 loss = 0.415 (2.696 sec/step)
step 13970 	 loss = 0.408 (2.694 sec/step)
step 13980 	 loss = 0.418 (3.158 sec/step)
step 13990 	 loss = 0.433 (2.896 sec/step)
step 14000 	 loss = 0.448 (2.985 sec/step)
step 14010 	 loss = 0.447 (2.798 sec/step)
step 14020 	 loss = 0.455 (3.174 sec/step)
step 14030 	 loss = 0.430 (2.976 sec/step)
step 14040 	 loss = 0.460 (3.229 sec/step)
step 14050 	 loss = 0.440 (2.950 sec/step)
step 14060 	 loss = 0.451 (2.655 sec/step)
step 14070 	 loss = 0.467 (2.841 sec/step)
step 14080 	 loss = 0.463 (2.770 sec/step)
step 14090 	 loss = 0.433 (3.063 sec/step)
step 14100 	 loss = 0.411 (2.706 sec/step)
step 14110 	 loss = 0.398 (2.883 sec/step)
step 14120 	 loss = 0.440 (2.890 sec/step)
step 14130 	 loss = 0.458 (2.849 sec/step)
step 14140 	 loss = 0.427 (2.790 sec/step)
step 14150 	 loss = 0.435 (2.944 sec/step)
step 14160 	 loss = 0.474 (2.821 sec/step)
step 14170 	 loss = 0.434 (2.625 sec/step)
step 14180 	 loss = 0.448 (2.982 sec/step)
step 14190 	 loss = 0.446 (2.826 sec/step)
step 14200 	 loss = 0.437 (2.723 sec/step)
step 14210 	 loss = 0.444 (2.941 sec/step)
step 14220 	 loss = 0.419 (2.781 sec/step)
step 14230 	 loss = 0.444 (2.571 sec/step)
step 14240 	 loss = 0.444 (2.781 sec/step)
step 14250 	 loss = 0.447 (2.725 sec/step)
step 14260 	 loss = 0.442 (2.936 sec/step)
step 14270 	 loss = 0.435 (2.829 sec/step)
step 14280 	 loss = 0.427 (2.907 sec/step)
step 14290 	 loss = 0.450 (3.165 sec/step)
step 14300 	 loss = 0.455 (2.912 sec/step)
step 14310 	 loss = 0.418 (2.777 sec/step)
step 14320 	 loss = 0.460 (2.772 sec/step)
step 14330 	 loss = 0.452 (2.521 sec/step)
step 14340 	 loss = 0.426 (2.603 sec/step)
step 14350 	 loss = 0.426 (2.731 sec/step)
step 14360 	 loss = 0.457 (2.621 sec/step)
step 14370 	 loss = 0.432 (2.878 sec/step)
step 14380 	 loss = 0.447 (2.948 sec/step)
step 14390 	 loss = 0.431 (2.783 sec/step)
step 14400 	 loss = 0.446 (2.901 sec/step)
step 14410 	 loss = 0.431 (2.720 sec/step)
step 14420 	 loss = 0.442 (2.734 sec/step)
step 14430 	 loss = 0.439 (2.730 sec/step)
step 14440 	 loss = 0.435 (2.769 sec/step)
step 14450 	 loss = 0.425 (2.792 sec/step)
step 14460 	 loss = 0.414 (2.879 sec/step)
step 14470 	 loss = 0.464 (2.808 sec/step)
step 14480 	 loss = 0.418 (2.690 sec/step)
step 14490 	 loss = 0.454 (2.910 sec/step)
step 14500 	 loss = 0.465 (3.086 sec/step)
step 14510 	 loss = 0.426 (2.534 sec/step)
step 14520 	 loss = 0.465 (2.458 sec/step)
step 14530 	 loss = 0.432 (2.920 sec/step)
step 14540 	 loss = 0.439 (2.812 sec/step)
step 14550 	 loss = 0.448 (2.825 sec/step)
step 14560 	 loss = 0.461 (2.898 sec/step)
step 14570 	 loss = 0.445 (2.608 sec/step)
step 14580 	 loss = 0.432 (2.499 sec/step)
step 14590 	 loss = 0.417 (3.016 sec/step)
step 14600 	 loss = 0.445 (2.691 sec/step)
step 14610 	 loss = 0.439 (2.762 sec/step)
step 14620 	 loss = 0.434 (2.710 sec/step)
step 14630 	 loss = 0.431 (2.652 sec/step)
step 14640 	 loss = 0.449 (2.744 sec/step)
step 14650 	 loss = 0.447 (2.818 sec/step)
step 14660 	 loss = 0.446 (2.840 sec/step)
step 14670 	 loss = 0.432 (2.993 sec/step)
step 14680 	 loss = 0.423 (2.804 sec/step)
step 14690 	 loss = 0.434 (3.084 sec/step)
step 14700 	 loss = 0.432 (2.719 sec/step)
step 14710 	 loss = 0.414 (2.824 sec/step)
step 14720 	 loss = 0.444 (2.724 sec/step)
step 14730 	 loss = 0.460 (2.853 sec/step)
step 14740 	 loss = 0.440 (2.748 sec/step)
step 14750 	 loss = 0.401 (2.843 sec/step)
step 14760 	 loss = 0.439 (2.519 sec/step)
step 14770 	 loss = 0.460 (3.180 sec/step)
step 14780 	 loss = 0.452 (2.995 sec/step)
step 14790 	 loss = 0.447 (2.563 sec/step)
step 14800 	 loss = 0.478 (2.844 sec/step)
step 14810 	 loss = 0.438 (3.141 sec/step)
step 14820 	 loss = 0.459 (2.767 sec/step)
step 14830 	 loss = 0.441 (2.629 sec/step)
New Best Loss 0.075 < Old Best 0.076.  Saving...
The checkpoint has been created.
Training Epoch 4/100
step 14840 	 loss = 0.403 (2.933 sec/step)
step 14850 	 loss = 0.417 (3.003 sec/step)
step 14860 	 loss = 0.384 (3.060 sec/step)
step 14870 	 loss = 0.374 (3.335 sec/step)
step 14880 	 loss = 0.389 (2.464 sec/step)
step 14890 	 loss = 0.400 (3.097 sec/step)
step 14900 	 loss = 0.396 (2.797 sec/step)
step 14910 	 loss = 0.384 (2.830 sec/step)
step 14920 	 loss = 0.354 (2.823 sec/step)
step 14930 	 loss = 0.391 (2.725 sec/step)
step 14940 	 loss = 0.363 (2.791 sec/step)
step 14950 	 loss = 0.364 (2.704 sec/step)
step 14960 	 loss = 0.364 (2.766 sec/step)
step 14970 	 loss = 0.393 (2.851 sec/step)
step 14980 	 loss = 0.417 (3.010 sec/step)
step 14990 	 loss = 0.407 (2.767 sec/step)
step 15000 	 loss = 0.388 (3.036 sec/step)
step 15010 	 loss = 0.403 (2.963 sec/step)
step 15020 	 loss = 0.412 (2.753 sec/step)
step 15030 	 loss = 0.394 (2.800 sec/step)
step 15040 	 loss = 0.371 (2.746 sec/step)
step 15050 	 loss = 0.375 (2.692 sec/step)
step 15060 	 loss = 0.366 (2.766 sec/step)
step 15070 	 loss = 0.384 (2.660 sec/step)
step 15080 	 loss = 0.372 (2.768 sec/step)
step 15090 	 loss = 0.383 (3.016 sec/step)
step 15100 	 loss = 0.409 (3.114 sec/step)
step 15110 	 loss = 0.406 (2.504 sec/step)
step 15120 	 loss = 0.419 (2.463 sec/step)
step 15130 	 loss = 0.357 (2.902 sec/step)
step 15140 	 loss = 0.382 (3.100 sec/step)
step 15150 	 loss = 0.400 (2.972 sec/step)
step 15160 	 loss = 0.372 (2.896 sec/step)
step 15170 	 loss = 0.439 (2.817 sec/step)
step 15180 	 loss = 0.374 (2.900 sec/step)
step 15190 	 loss = 0.383 (2.683 sec/step)
step 15200 	 loss = 0.361 (2.707 sec/step)
step 15210 	 loss = 0.404 (2.793 sec/step)
step 15220 	 loss = 0.406 (3.062 sec/step)
step 15230 	 loss = 0.367 (2.890 sec/step)
step 15240 	 loss = 0.405 (2.964 sec/step)
step 15250 	 loss = 0.376 (2.911 sec/step)
step 15260 	 loss = 0.373 (2.805 sec/step)
step 15270 	 loss = 0.384 (2.779 sec/step)
step 15280 	 loss = 0.396 (2.791 sec/step)
step 15290 	 loss = 0.396 (2.804 sec/step)
step 15300 	 loss = 0.380 (2.890 sec/step)
step 15310 	 loss = 0.395 (2.677 sec/step)
step 15320 	 loss = 0.379 (3.037 sec/step)
step 15330 	 loss = 0.357 (2.723 sec/step)
step 15340 	 loss = 0.359 (2.589 sec/step)
step 15350 	 loss = 0.385 (2.520 sec/step)
step 15360 	 loss = 0.379 (2.923 sec/step)
step 15370 	 loss = 0.414 (2.909 sec/step)
step 15380 	 loss = 0.393 (3.172 sec/step)
step 15390 	 loss = 0.403 (3.067 sec/step)
step 15400 	 loss = 0.406 (2.888 sec/step)
step 15410 	 loss = 0.381 (2.448 sec/step)
step 15420 	 loss = 0.393 (2.380 sec/step)
step 15430 	 loss = 0.386 (2.258 sec/step)
step 15440 	 loss = 0.387 (2.196 sec/step)
step 15450 	 loss = 0.386 (2.350 sec/step)
step 15460 	 loss = 0.397 (2.331 sec/step)
step 15470 	 loss = 0.380 (2.294 sec/step)
step 15480 	 loss = 0.371 (2.317 sec/step)
step 15490 	 loss = 0.403 (2.515 sec/step)
step 15500 	 loss = 0.381 (2.280 sec/step)
step 15510 	 loss = 0.396 (2.384 sec/step)
step 15520 	 loss = 0.382 (3.465 sec/step)
step 15530 	 loss = 0.386 (2.645 sec/step)
step 15540 	 loss = 0.393 (2.435 sec/step)
step 15550 	 loss = 0.400 (3.332 sec/step)
step 15560 	 loss = 0.407 (2.936 sec/step)
step 15570 	 loss = 0.402 (2.871 sec/step)
step 15580 	 loss = 0.399 (3.149 sec/step)
step 15590 	 loss = 0.390 (3.211 sec/step)
step 15600 	 loss = 0.391 (2.923 sec/step)
step 15610 	 loss = 0.394 (2.562 sec/step)
step 15620 	 loss = 0.377 (3.032 sec/step)
step 15630 	 loss = 0.384 (2.665 sec/step)
step 15640 	 loss = 0.392 (2.960 sec/step)
step 15650 	 loss = 0.377 (2.846 sec/step)
step 15660 	 loss = 0.394 (2.751 sec/step)
step 15670 	 loss = 0.387 (3.093 sec/step)
step 15680 	 loss = 0.399 (3.265 sec/step)
step 15690 	 loss = 0.389 (2.815 sec/step)
step 15700 	 loss = 0.378 (2.749 sec/step)
step 15710 	 loss = 0.404 (3.056 sec/step)
step 15720 	 loss = 0.394 (2.649 sec/step)
step 15730 	 loss = 0.394 (2.740 sec/step)
step 15740 	 loss = 0.373 (2.627 sec/step)
step 15750 	 loss = 0.373 (2.609 sec/step)
step 15760 	 loss = 0.402 (2.668 sec/step)
step 15770 	 loss = 0.380 (2.940 sec/step)
step 15780 	 loss = 0.374 (2.686 sec/step)
step 15790 	 loss = 0.416 (2.462 sec/step)
step 15800 	 loss = 0.393 (2.770 sec/step)
step 15810 	 loss = 0.417 (2.898 sec/step)
step 15820 	 loss = 0.385 (2.955 sec/step)
step 15830 	 loss = 0.379 (2.999 sec/step)
step 15840 	 loss = 0.376 (2.807 sec/step)
step 15850 	 loss = 0.400 (2.924 sec/step)
step 15860 	 loss = 0.407 (2.993 sec/step)
step 15870 	 loss = 0.399 (2.626 sec/step)
step 15880 	 loss = 0.374 (2.649 sec/step)
step 15890 	 loss = 0.399 (2.617 sec/step)
step 15900 	 loss = 0.398 (2.982 sec/step)
step 15910 	 loss = 0.427 (2.794 sec/step)
step 15920 	 loss = 0.437 (2.818 sec/step)
step 15930 	 loss = 0.428 (2.958 sec/step)
step 15940 	 loss = 0.392 (3.189 sec/step)
step 15950 	 loss = 0.384 (2.919 sec/step)
step 15960 	 loss = 0.401 (2.822 sec/step)
step 15970 	 loss = 0.425 (2.725 sec/step)
step 15980 	 loss = 0.409 (2.498 sec/step)
step 15990 	 loss = 0.401 (2.882 sec/step)
step 16000 	 loss = 0.400 (2.756 sec/step)
step 16010 	 loss = 0.374 (3.021 sec/step)
step 16020 	 loss = 0.405 (2.606 sec/step)
step 16030 	 loss = 0.395 (2.713 sec/step)
step 16040 	 loss = 0.406 (2.803 sec/step)
step 16050 	 loss = 0.383 (2.745 sec/step)
step 16060 	 loss = 0.402 (2.861 sec/step)
step 16070 	 loss = 0.386 (2.751 sec/step)
step 16080 	 loss = 0.392 (3.010 sec/step)
step 16090 	 loss = 0.382 (2.748 sec/step)
step 16100 	 loss = 0.436 (2.879 sec/step)
step 16110 	 loss = 0.403 (2.901 sec/step)
step 16120 	 loss = 0.409 (2.806 sec/step)
step 16130 	 loss = 0.374 (2.976 sec/step)
step 16140 	 loss = 0.395 (2.599 sec/step)
step 16150 	 loss = 0.404 (3.074 sec/step)
step 16160 	 loss = 0.402 (2.496 sec/step)
step 16170 	 loss = 0.414 (2.718 sec/step)
step 16180 	 loss = 0.403 (2.922 sec/step)
step 16190 	 loss = 0.393 (2.520 sec/step)
step 16200 	 loss = 0.390 (3.063 sec/step)
step 16210 	 loss = 0.382 (2.732 sec/step)
step 16220 	 loss = 0.374 (2.881 sec/step)
step 16230 	 loss = 0.402 (2.717 sec/step)
step 16240 	 loss = 0.395 (2.905 sec/step)
step 16250 	 loss = 0.379 (2.744 sec/step)
step 16260 	 loss = 0.381 (2.909 sec/step)
step 16270 	 loss = 0.403 (2.797 sec/step)
step 16280 	 loss = 0.418 (2.885 sec/step)
step 16290 	 loss = 0.402 (2.973 sec/step)
step 16300 	 loss = 0.403 (2.779 sec/step)
step 16310 	 loss = 0.411 (2.771 sec/step)
step 16320 	 loss = 0.412 (2.759 sec/step)
step 16330 	 loss = 0.401 (3.007 sec/step)
step 16340 	 loss = 0.406 (2.575 sec/step)
step 16350 	 loss = 0.382 (3.169 sec/step)
step 16360 	 loss = 0.402 (2.741 sec/step)
step 16370 	 loss = 0.390 (2.822 sec/step)
step 16380 	 loss = 0.380 (2.829 sec/step)
step 16390 	 loss = 0.398 (2.835 sec/step)
step 16400 	 loss = 0.420 (2.811 sec/step)
step 16410 	 loss = 0.369 (2.782 sec/step)
step 16420 	 loss = 0.395 (2.672 sec/step)
step 16430 	 loss = 0.398 (2.649 sec/step)
step 16440 	 loss = 0.395 (3.131 sec/step)
step 16450 	 loss = 0.391 (2.778 sec/step)
step 16460 	 loss = 0.388 (2.701 sec/step)
step 16470 	 loss = 0.409 (2.717 sec/step)
step 16480 	 loss = 0.426 (2.862 sec/step)
step 16490 	 loss = 0.389 (2.970 sec/step)
step 16500 	 loss = 0.418 (2.689 sec/step)
step 16510 	 loss = 0.390 (3.208 sec/step)
step 16520 	 loss = 0.410 (3.031 sec/step)
step 16530 	 loss = 0.393 (2.665 sec/step)
step 16540 	 loss = 0.384 (2.786 sec/step)
step 16550 	 loss = 0.398 (2.845 sec/step)
step 16560 	 loss = 0.363 (2.712 sec/step)
step 16570 	 loss = 0.412 (2.924 sec/step)
step 16580 	 loss = 0.406 (2.969 sec/step)
step 16590 	 loss = 0.380 (2.723 sec/step)
step 16600 	 loss = 0.423 (2.803 sec/step)
step 16610 	 loss = 0.402 (2.797 sec/step)
step 16620 	 loss = 0.396 (2.899 sec/step)
step 16630 	 loss = 0.412 (2.798 sec/step)
step 16640 	 loss = 0.397 (2.810 sec/step)
step 16650 	 loss = 0.390 (2.919 sec/step)
step 16660 	 loss = 0.396 (2.992 sec/step)
step 16670 	 loss = 0.411 (3.022 sec/step)
step 16680 	 loss = 0.402 (2.742 sec/step)
step 16690 	 loss = 0.407 (2.858 sec/step)
step 16700 	 loss = 0.414 (2.687 sec/step)
step 16710 	 loss = 0.381 (2.872 sec/step)
step 16720 	 loss = 0.421 (2.844 sec/step)
step 16730 	 loss = 0.400 (3.080 sec/step)
step 16740 	 loss = 0.412 (2.735 sec/step)
step 16750 	 loss = 0.405 (3.101 sec/step)
step 16760 	 loss = 0.424 (3.055 sec/step)
step 16770 	 loss = 0.395 (2.770 sec/step)
step 16780 	 loss = 0.409 (2.787 sec/step)
step 16790 	 loss = 0.383 (2.610 sec/step)
step 16800 	 loss = 0.423 (2.842 sec/step)
step 16810 	 loss = 0.405 (2.533 sec/step)
step 16820 	 loss = 0.385 (2.712 sec/step)
step 16830 	 loss = 0.418 (2.576 sec/step)
step 16840 	 loss = 0.417 (2.872 sec/step)
step 16850 	 loss = 0.390 (2.398 sec/step)
step 16860 	 loss = 0.375 (2.835 sec/step)
step 16870 	 loss = 0.412 (2.749 sec/step)
step 16880 	 loss = 0.389 (2.590 sec/step)
step 16890 	 loss = 0.429 (3.004 sec/step)
step 16900 	 loss = 0.380 (2.807 sec/step)
step 16910 	 loss = 0.398 (2.641 sec/step)
step 16920 	 loss = 0.399 (2.744 sec/step)
step 16930 	 loss = 0.407 (3.048 sec/step)
step 16940 	 loss = 0.383 (2.909 sec/step)
step 16950 	 loss = 0.398 (2.934 sec/step)
step 16960 	 loss = 0.406 (2.671 sec/step)
step 16970 	 loss = 0.385 (2.946 sec/step)
step 16980 	 loss = 0.380 (2.934 sec/step)
step 16990 	 loss = 0.379 (3.140 sec/step)
step 17000 	 loss = 0.390 (2.818 sec/step)
step 17010 	 loss = 0.422 (2.894 sec/step)
step 17020 	 loss = 0.375 (2.941 sec/step)
step 17030 	 loss = 0.417 (2.937 sec/step)
step 17040 	 loss = 0.433 (2.962 sec/step)
step 17050 	 loss = 0.386 (2.857 sec/step)
step 17060 	 loss = 0.392 (2.602 sec/step)
step 17070 	 loss = 0.412 (2.526 sec/step)
step 17080 	 loss = 0.429 (2.715 sec/step)
step 17090 	 loss = 0.388 (3.028 sec/step)
step 17100 	 loss = 0.419 (2.499 sec/step)
step 17110 	 loss = 0.434 (2.845 sec/step)
step 17120 	 loss = 0.394 (2.805 sec/step)
step 17130 	 loss = 0.418 (2.460 sec/step)
step 17140 	 loss = 0.421 (2.890 sec/step)
step 17150 	 loss = 0.402 (2.936 sec/step)
step 17160 	 loss = 0.405 (3.057 sec/step)
step 17170 	 loss = 0.406 (2.762 sec/step)
step 17180 	 loss = 0.393 (3.001 sec/step)
step 17190 	 loss = 0.407 (2.881 sec/step)
step 17200 	 loss = 0.417 (3.052 sec/step)
step 17210 	 loss = 0.399 (3.085 sec/step)
step 17220 	 loss = 0.384 (2.620 sec/step)
step 17230 	 loss = 0.399 (2.599 sec/step)
step 17240 	 loss = 0.402 (3.030 sec/step)
step 17250 	 loss = 0.414 (2.914 sec/step)
step 17260 	 loss = 0.398 (2.605 sec/step)
step 17270 	 loss = 0.401 (3.162 sec/step)
step 17280 	 loss = 0.400 (2.816 sec/step)
step 17290 	 loss = 0.397 (2.858 sec/step)
step 17300 	 loss = 0.429 (2.591 sec/step)
step 17310 	 loss = 0.428 (2.749 sec/step)
step 17320 	 loss = 0.390 (2.741 sec/step)
step 17330 	 loss = 0.400 (3.163 sec/step)
step 17340 	 loss = 0.376 (3.007 sec/step)
step 17350 	 loss = 0.400 (2.885 sec/step)
step 17360 	 loss = 0.414 (3.004 sec/step)
step 17370 	 loss = 0.426 (3.229 sec/step)
step 17380 	 loss = 0.406 (2.861 sec/step)
step 17390 	 loss = 0.399 (2.677 sec/step)
step 17400 	 loss = 0.431 (2.901 sec/step)
step 17410 	 loss = 0.396 (2.779 sec/step)
step 17420 	 loss = 0.399 (3.005 sec/step)
step 17430 	 loss = 0.384 (2.868 sec/step)
step 17440 	 loss = 0.399 (2.605 sec/step)
step 17450 	 loss = 0.399 (2.648 sec/step)
step 17460 	 loss = 0.391 (2.972 sec/step)
step 17470 	 loss = 0.383 (3.241 sec/step)
step 17480 	 loss = 0.389 (2.992 sec/step)
step 17490 	 loss = 0.402 (3.021 sec/step)
step 17500 	 loss = 0.400 (2.752 sec/step)
step 17510 	 loss = 0.419 (2.846 sec/step)
step 17520 	 loss = 0.434 (2.682 sec/step)
step 17530 	 loss = 0.408 (2.835 sec/step)
step 17540 	 loss = 0.406 (2.892 sec/step)
step 17550 	 loss = 0.415 (2.760 sec/step)
step 17560 	 loss = 0.408 (2.763 sec/step)
step 17570 	 loss = 0.407 (2.620 sec/step)
step 17580 	 loss = 0.418 (2.841 sec/step)
step 17590 	 loss = 0.413 (2.739 sec/step)
step 17600 	 loss = 0.406 (3.031 sec/step)
step 17610 	 loss = 0.410 (2.873 sec/step)
step 17620 	 loss = 0.423 (2.698 sec/step)
step 17630 	 loss = 0.402 (2.704 sec/step)
step 17640 	 loss = 0.395 (2.816 sec/step)
step 17650 	 loss = 0.402 (2.837 sec/step)
step 17660 	 loss = 0.400 (3.018 sec/step)
step 17670 	 loss = 0.401 (2.555 sec/step)
step 17680 	 loss = 0.391 (2.795 sec/step)
step 17690 	 loss = 0.391 (2.777 sec/step)
step 17700 	 loss = 0.410 (2.673 sec/step)
step 17710 	 loss = 0.395 (2.772 sec/step)
step 17720 	 loss = 0.393 (2.775 sec/step)
step 17730 	 loss = 0.400 (2.748 sec/step)
step 17740 	 loss = 0.435 (2.874 sec/step)
step 17750 	 loss = 0.396 (2.866 sec/step)
step 17760 	 loss = 0.415 (2.906 sec/step)
step 17770 	 loss = 0.402 (2.871 sec/step)
step 17780 	 loss = 0.385 (2.974 sec/step)
step 17790 	 loss = 0.400 (3.036 sec/step)
step 17800 	 loss = 0.401 (2.804 sec/step)
step 17810 	 loss = 0.415 (2.605 sec/step)
step 17820 	 loss = 0.427 (2.789 sec/step)
step 17830 	 loss = 0.374 (2.803 sec/step)
step 17840 	 loss = 0.420 (2.907 sec/step)
step 17850 	 loss = 0.411 (2.579 sec/step)
step 17860 	 loss = 0.442 (2.883 sec/step)
step 17870 	 loss = 0.399 (2.864 sec/step)
step 17880 	 loss = 0.377 (2.963 sec/step)
step 17890 	 loss = 0.432 (2.815 sec/step)
step 17900 	 loss = 0.399 (2.768 sec/step)
step 17910 	 loss = 0.389 (3.079 sec/step)
step 17920 	 loss = 0.414 (2.595 sec/step)
step 17930 	 loss = 0.405 (2.609 sec/step)
step 17940 	 loss = 0.393 (3.057 sec/step)
step 17950 	 loss = 0.420 (3.097 sec/step)
step 17960 	 loss = 0.409 (2.742 sec/step)
step 17970 	 loss = 0.399 (2.957 sec/step)
step 17980 	 loss = 0.413 (3.003 sec/step)
step 17990 	 loss = 0.419 (3.015 sec/step)
step 18000 	 loss = 0.409 (2.726 sec/step)
step 18010 	 loss = 0.427 (3.140 sec/step)
step 18020 	 loss = 0.426 (2.655 sec/step)
step 18030 	 loss = 0.445 (2.830 sec/step)
step 18040 	 loss = 0.434 (2.927 sec/step)
step 18050 	 loss = 0.379 (3.210 sec/step)
step 18060 	 loss = 0.405 (2.568 sec/step)
step 18070 	 loss = 0.404 (3.017 sec/step)
step 18080 	 loss = 0.434 (2.812 sec/step)
step 18090 	 loss = 0.398 (2.899 sec/step)
step 18100 	 loss = 0.410 (3.035 sec/step)
step 18110 	 loss = 0.404 (3.328 sec/step)
step 18120 	 loss = 0.412 (2.827 sec/step)
step 18130 	 loss = 0.414 (2.981 sec/step)
step 18140 	 loss = 0.399 (2.859 sec/step)
step 18150 	 loss = 0.424 (2.847 sec/step)
step 18160 	 loss = 0.402 (2.776 sec/step)
step 18170 	 loss = 0.398 (2.672 sec/step)
step 18180 	 loss = 0.389 (2.966 sec/step)
step 18190 	 loss = 0.395 (2.828 sec/step)
step 18200 	 loss = 0.405 (2.955 sec/step)
step 18210 	 loss = 0.401 (2.777 sec/step)
step 18220 	 loss = 0.435 (2.837 sec/step)
step 18230 	 loss = 0.410 (2.872 sec/step)
step 18240 	 loss = 0.427 (2.929 sec/step)
step 18250 	 loss = 0.395 (2.846 sec/step)
step 18260 	 loss = 0.407 (2.805 sec/step)
step 18270 	 loss = 0.420 (2.701 sec/step)
step 18280 	 loss = 0.414 (2.694 sec/step)
step 18290 	 loss = 0.402 (3.313 sec/step)
step 18300 	 loss = 0.421 (2.877 sec/step)
step 18310 	 loss = 0.429 (2.814 sec/step)
step 18320 	 loss = 0.414 (2.872 sec/step)
step 18330 	 loss = 0.396 (2.681 sec/step)
step 18340 	 loss = 0.394 (2.966 sec/step)
step 18350 	 loss = 0.405 (2.829 sec/step)
step 18360 	 loss = 0.410 (2.616 sec/step)
step 18370 	 loss = 0.408 (2.848 sec/step)
step 18380 	 loss = 0.393 (2.771 sec/step)
step 18390 	 loss = 0.393 (2.612 sec/step)
step 18400 	 loss = 0.390 (3.211 sec/step)
step 18410 	 loss = 0.407 (2.976 sec/step)
step 18420 	 loss = 0.408 (2.785 sec/step)
step 18430 	 loss = 0.421 (2.565 sec/step)
step 18440 	 loss = 0.406 (2.764 sec/step)
step 18450 	 loss = 0.447 (2.558 sec/step)
step 18460 	 loss = 0.416 (2.713 sec/step)
step 18470 	 loss = 0.415 (2.712 sec/step)
step 18480 	 loss = 0.424 (2.989 sec/step)
step 18490 	 loss = 0.380 (2.817 sec/step)
step 18500 	 loss = 0.414 (2.576 sec/step)
step 18510 	 loss = 0.395 (2.865 sec/step)
step 18520 	 loss = 0.400 (3.148 sec/step)
step 18530 	 loss = 0.408 (2.778 sec/step)
Training Epoch 5/100
step 18540 	 loss = 0.355 (2.774 sec/step)
step 18550 	 loss = 0.356 (3.037 sec/step)
step 18560 	 loss = 0.383 (2.587 sec/step)
step 18570 	 loss = 0.340 (2.827 sec/step)
step 18580 	 loss = 0.357 (2.983 sec/step)
step 18590 	 loss = 0.346 (2.912 sec/step)
step 18600 	 loss = 0.367 (2.706 sec/step)
step 18610 	 loss = 0.344 (2.801 sec/step)
step 18620 	 loss = 0.380 (2.685 sec/step)
step 18630 	 loss = 0.376 (2.707 sec/step)
step 18640 	 loss = 0.373 (2.664 sec/step)
step 18650 	 loss = 0.384 (2.857 sec/step)
step 18660 	 loss = 0.342 (2.906 sec/step)
step 18670 	 loss = 0.360 (2.837 sec/step)
step 18680 	 loss = 0.358 (3.044 sec/step)
step 18690 	 loss = 0.329 (2.952 sec/step)
step 18700 	 loss = 0.346 (2.624 sec/step)
step 18710 	 loss = 0.344 (2.632 sec/step)
step 18720 	 loss = 0.364 (3.045 sec/step)
step 18730 	 loss = 0.351 (2.890 sec/step)
step 18740 	 loss = 0.354 (3.018 sec/step)
step 18750 	 loss = 0.362 (2.673 sec/step)
step 18760 	 loss = 0.352 (2.659 sec/step)
step 18770 	 loss = 0.350 (2.816 sec/step)
step 18780 	 loss = 0.358 (2.786 sec/step)
step 18790 	 loss = 0.330 (2.621 sec/step)
step 18800 	 loss = 0.363 (2.607 sec/step)
step 18810 	 loss = 0.359 (2.777 sec/step)
step 18820 	 loss = 0.360 (2.892 sec/step)
step 18830 	 loss = 0.346 (2.598 sec/step)
step 18840 	 loss = 0.347 (2.871 sec/step)
step 18850 	 loss = 0.379 (2.810 sec/step)
step 18860 	 loss = 0.390 (2.753 sec/step)
step 18870 	 loss = 0.382 (2.555 sec/step)
step 18880 	 loss = 0.341 (2.946 sec/step)
step 18890 	 loss = 0.358 (3.087 sec/step)
step 18900 	 loss = 0.357 (2.821 sec/step)
step 18910 	 loss = 0.360 (2.873 sec/step)
step 18920 	 loss = 0.357 (2.907 sec/step)
step 18930 	 loss = 0.370 (3.057 sec/step)
step 18940 	 loss = 0.355 (2.938 sec/step)
step 18950 	 loss = 0.369 (2.654 sec/step)
step 18960 	 loss = 0.365 (2.776 sec/step)
step 18970 	 loss = 0.376 (3.192 sec/step)
step 18980 	 loss = 0.351 (3.081 sec/step)
step 18990 	 loss = 0.373 (2.704 sec/step)
step 19000 	 loss = 0.376 (2.962 sec/step)
step 19010 	 loss = 0.362 (2.951 sec/step)
step 19020 	 loss = 0.352 (2.832 sec/step)
step 19030 	 loss = 0.341 (2.828 sec/step)
step 19040 	 loss = 0.362 (2.667 sec/step)
step 19050 	 loss = 0.375 (3.162 sec/step)
step 19060 	 loss = 0.335 (2.997 sec/step)
step 19070 	 loss = 0.356 (2.950 sec/step)
step 19080 	 loss = 0.349 (2.912 sec/step)
step 19090 	 loss = 0.403 (3.164 sec/step)
step 19100 	 loss = 0.375 (2.705 sec/step)
step 19110 	 loss = 0.359 (2.787 sec/step)
step 19120 	 loss = 0.376 (2.699 sec/step)
step 19130 	 loss = 0.347 (2.810 sec/step)
step 19140 	 loss = 0.356 (2.911 sec/step)
step 19150 	 loss = 0.380 (2.768 sec/step)
step 19160 	 loss = 0.348 (2.833 sec/step)
step 19170 	 loss = 0.365 (3.038 sec/step)
step 19180 	 loss = 0.372 (2.692 sec/step)
step 19190 	 loss = 0.352 (2.766 sec/step)
step 19200 	 loss = 0.362 (2.744 sec/step)
step 19210 	 loss = 0.369 (3.279 sec/step)
step 19220 	 loss = 0.374 (2.559 sec/step)
step 19230 	 loss = 0.365 (2.912 sec/step)
step 19240 	 loss = 0.352 (2.786 sec/step)
step 19250 	 loss = 0.385 (2.914 sec/step)
step 19260 	 loss = 0.365 (2.896 sec/step)
step 19270 	 loss = 0.370 (2.904 sec/step)
step 19280 	 loss = 0.371 (2.475 sec/step)
step 19290 	 loss = 0.364 (3.138 sec/step)
step 19300 	 loss = 0.374 (2.933 sec/step)
step 19310 	 loss = 0.360 (2.792 sec/step)
step 19320 	 loss = 0.364 (2.466 sec/step)
step 19330 	 loss = 0.370 (2.721 sec/step)
step 19340 	 loss = 0.358 (2.937 sec/step)
step 19350 	 loss = 0.383 (2.828 sec/step)
step 19360 	 loss = 0.361 (2.803 sec/step)
step 19370 	 loss = 0.382 (2.634 sec/step)
step 19380 	 loss = 0.384 (2.814 sec/step)
step 19390 	 loss = 0.372 (3.049 sec/step)
step 19400 	 loss = 0.365 (2.907 sec/step)
step 19410 	 loss = 0.364 (2.755 sec/step)
step 19420 	 loss = 0.354 (3.126 sec/step)
step 19430 	 loss = 0.356 (2.995 sec/step)
step 19440 	 loss = 0.353 (2.641 sec/step)
step 19450 	 loss = 0.351 (3.155 sec/step)
step 19460 	 loss = 0.376 (2.774 sec/step)
step 19470 	 loss = 0.380 (2.626 sec/step)
step 19480 	 loss = 0.355 (2.746 sec/step)
step 19490 	 loss = 0.363 (2.654 sec/step)
step 19500 	 loss = 0.369 (2.961 sec/step)
step 19510 	 loss = 0.372 (2.977 sec/step)
step 19520 	 loss = 0.350 (2.882 sec/step)
step 19530 	 loss = 0.368 (2.819 sec/step)
step 19540 	 loss = 0.367 (2.424 sec/step)
step 19550 	 loss = 0.372 (2.882 sec/step)
step 19560 	 loss = 0.350 (2.685 sec/step)
step 19570 	 loss = 0.371 (2.807 sec/step)
step 19580 	 loss = 0.333 (2.874 sec/step)
step 19590 	 loss = 0.380 (2.681 sec/step)
step 19600 	 loss = 0.391 (2.519 sec/step)
step 19610 	 loss = 0.371 (2.859 sec/step)
step 19620 	 loss = 0.355 (2.604 sec/step)
step 19630 	 loss = 0.364 (2.748 sec/step)
step 19640 	 loss = 0.344 (2.722 sec/step)
step 19650 	 loss = 0.379 (2.871 sec/step)
step 19660 	 loss = 0.367 (2.573 sec/step)
step 19670 	 loss = 0.387 (2.888 sec/step)
step 19680 	 loss = 0.356 (2.754 sec/step)
step 19690 	 loss = 0.369 (2.827 sec/step)
step 19700 	 loss = 0.351 (2.657 sec/step)
step 19710 	 loss = 0.365 (2.781 sec/step)
step 19720 	 loss = 0.346 (2.927 sec/step)
step 19730 	 loss = 0.371 (2.813 sec/step)
step 19740 	 loss = 0.358 (3.128 sec/step)
step 19750 	 loss = 0.369 (2.886 sec/step)
step 19760 	 loss = 0.352 (3.070 sec/step)
step 19770 	 loss = 0.363 (2.788 sec/step)
step 19780 	 loss = 0.356 (2.591 sec/step)
step 19790 	 loss = 0.368 (3.157 sec/step)
step 19800 	 loss = 0.341 (2.614 sec/step)
step 19810 	 loss = 0.363 (2.877 sec/step)
step 19820 	 loss = 0.369 (2.755 sec/step)
step 19830 	 loss = 0.360 (2.831 sec/step)
step 19840 	 loss = 0.354 (2.811 sec/step)
step 19850 	 loss = 0.347 (2.824 sec/step)
step 19860 	 loss = 0.360 (2.954 sec/step)
step 19870 	 loss = 0.369 (2.874 sec/step)
step 19880 	 loss = 0.361 (2.652 sec/step)
step 19890 	 loss = 0.356 (2.635 sec/step)
step 19900 	 loss = 0.383 (2.826 sec/step)
step 19910 	 loss = 0.364 (2.639 sec/step)
step 19920 	 loss = 0.365 (3.150 sec/step)
step 19930 	 loss = 0.365 (2.803 sec/step)
step 19940 	 loss = 0.396 (2.687 sec/step)
step 19950 	 loss = 0.364 (2.691 sec/step)
step 19960 	 loss = 0.375 (3.043 sec/step)
step 19970 	 loss = 0.358 (2.724 sec/step)
step 19980 	 loss = 0.377 (2.880 sec/step)
step 19990 	 loss = 0.360 (2.927 sec/step)
step 20000 	 loss = 0.350 (2.943 sec/step)
step 20010 	 loss = 0.369 (2.842 sec/step)
step 20020 	 loss = 0.377 (3.057 sec/step)
step 20030 	 loss = 0.369 (3.134 sec/step)
step 20040 	 loss = 0.368 (2.824 sec/step)
step 20050 	 loss = 0.370 (2.658 sec/step)
step 20060 	 loss = 0.358 (2.790 sec/step)
step 20070 	 loss = 0.364 (2.956 sec/step)
step 20080 	 loss = 0.366 (3.107 sec/step)
step 20090 	 loss = 0.360 (2.715 sec/step)
step 20100 	 loss = 0.367 (2.889 sec/step)
step 20110 	 loss = 0.379 (2.729 sec/step)
step 20120 	 loss = 0.403 (2.906 sec/step)
step 20130 	 loss = 0.377 (2.909 sec/step)
step 20140 	 loss = 0.360 (3.061 sec/step)
step 20150 	 loss = 0.367 (2.845 sec/step)
step 20160 	 loss = 0.383 (2.929 sec/step)
step 20170 	 loss = 0.375 (3.125 sec/step)
step 20180 	 loss = 0.350 (2.998 sec/step)
step 20190 	 loss = 0.380 (2.719 sec/step)
step 20200 	 loss = 0.378 (2.890 sec/step)
step 20210 	 loss = 0.360 (2.641 sec/step)
step 20220 	 loss = 0.358 (2.780 sec/step)
step 20230 	 loss = 0.365 (2.655 sec/step)
step 20240 	 loss = 0.382 (3.074 sec/step)
step 20250 	 loss = 0.361 (2.668 sec/step)
step 20260 	 loss = 0.354 (2.810 sec/step)
step 20270 	 loss = 0.356 (2.725 sec/step)
step 20280 	 loss = 0.392 (2.724 sec/step)
step 20290 	 loss = 0.379 (2.823 sec/step)
step 20300 	 loss = 0.374 (2.828 sec/step)
step 20310 	 loss = 0.358 (2.996 sec/step)
step 20320 	 loss = 0.359 (2.788 sec/step)
step 20330 	 loss = 0.327 (2.614 sec/step)
step 20340 	 loss = 0.367 (2.881 sec/step)
step 20350 	 loss = 0.379 (3.128 sec/step)
step 20360 	 loss = 0.358 (2.735 sec/step)
step 20370 	 loss = 0.373 (2.911 sec/step)
step 20380 	 loss = 0.421 (2.967 sec/step)
step 20390 	 loss = 0.371 (2.901 sec/step)
step 20400 	 loss = 0.373 (2.803 sec/step)
step 20410 	 loss = 0.404 (2.971 sec/step)
step 20420 	 loss = 0.369 (2.996 sec/step)
step 20430 	 loss = 0.361 (3.032 sec/step)
step 20440 	 loss = 0.356 (2.839 sec/step)
step 20450 	 loss = 0.389 (2.677 sec/step)
step 20460 	 loss = 0.371 (2.588 sec/step)
step 20470 	 loss = 0.370 (3.066 sec/step)
step 20480 	 loss = 0.359 (2.736 sec/step)
step 20490 	 loss = 0.377 (2.936 sec/step)
step 20500 	 loss = 0.356 (2.707 sec/step)
step 20510 	 loss = 0.367 (2.749 sec/step)
step 20520 	 loss = 0.348 (2.778 sec/step)
step 20530 	 loss = 0.362 (2.633 sec/step)
step 20540 	 loss = 0.345 (2.957 sec/step)
step 20550 	 loss = 0.388 (2.716 sec/step)
step 20560 	 loss = 0.337 (2.814 sec/step)
step 20570 	 loss = 0.362 (2.975 sec/step)
step 20580 	 loss = 0.371 (2.813 sec/step)
step 20590 	 loss = 0.366 (2.981 sec/step)
step 20600 	 loss = 0.375 (3.109 sec/step)
step 20610 	 loss = 0.364 (2.650 sec/step)
step 20620 	 loss = 0.372 (2.759 sec/step)
step 20630 	 loss = 0.362 (2.933 sec/step)
step 20640 	 loss = 0.351 (2.899 sec/step)
step 20650 	 loss = 0.369 (3.237 sec/step)
step 20660 	 loss = 0.369 (2.979 sec/step)
step 20670 	 loss = 0.378 (2.507 sec/step)
step 20680 	 loss = 0.374 (3.105 sec/step)
step 20690 	 loss = 0.371 (2.803 sec/step)
step 20700 	 loss = 0.368 (2.816 sec/step)
step 20710 	 loss = 0.361 (2.531 sec/step)
step 20720 	 loss = 0.383 (2.918 sec/step)
step 20730 	 loss = 0.365 (2.872 sec/step)
step 20740 	 loss = 0.367 (2.783 sec/step)
step 20750 	 loss = 0.369 (2.906 sec/step)
step 20760 	 loss = 0.371 (2.950 sec/step)
step 20770 	 loss = 0.358 (2.873 sec/step)
step 20780 	 loss = 0.375 (2.828 sec/step)
step 20790 	 loss = 0.367 (2.667 sec/step)
step 20800 	 loss = 0.374 (3.034 sec/step)
step 20810 	 loss = 0.397 (2.868 sec/step)
step 20820 	 loss = 0.358 (2.955 sec/step)
step 20830 	 loss = 0.379 (2.855 sec/step)
step 20840 	 loss = 0.384 (2.721 sec/step)
step 20850 	 loss = 0.358 (3.401 sec/step)
step 20860 	 loss = 0.397 (2.762 sec/step)
step 20870 	 loss = 0.377 (2.699 sec/step)
step 20880 	 loss = 0.378 (2.794 sec/step)
step 20890 	 loss = 0.382 (2.742 sec/step)
step 20900 	 loss = 0.379 (2.926 sec/step)
step 20910 	 loss = 0.384 (2.808 sec/step)
step 20920 	 loss = 0.383 (2.937 sec/step)
step 20930 	 loss = 0.365 (2.664 sec/step)
step 20940 	 loss = 0.368 (2.823 sec/step)
step 20950 	 loss = 0.356 (2.734 sec/step)
step 20960 	 loss = 0.364 (2.636 sec/step)
step 20970 	 loss = 0.397 (3.332 sec/step)
step 20980 	 loss = 0.388 (2.852 sec/step)
step 20990 	 loss = 0.393 (2.538 sec/step)
step 21000 	 loss = 0.399 (2.548 sec/step)
step 21010 	 loss = 0.388 (2.817 sec/step)
step 21020 	 loss = 0.383 (2.841 sec/step)
step 21030 	 loss = 0.394 (2.659 sec/step)
step 21040 	 loss = 0.354 (2.839 sec/step)
step 21050 	 loss = 0.384 (3.299 sec/step)
step 21060 	 loss = 0.396 (2.738 sec/step)
step 21070 	 loss = 0.370 (2.815 sec/step)
step 21080 	 loss = 0.367 (2.710 sec/step)
step 21090 	 loss = 0.371 (2.850 sec/step)
step 21100 	 loss = 0.372 (2.899 sec/step)
step 21110 	 loss = 0.387 (3.200 sec/step)
step 21120 	 loss = 0.362 (2.645 sec/step)
step 21130 	 loss = 0.358 (2.670 sec/step)
step 21140 	 loss = 0.378 (2.267 sec/step)
step 21150 	 loss = 0.378 (2.206 sec/step)
step 21160 	 loss = 0.372 (2.361 sec/step)
step 21170 	 loss = 0.375 (2.366 sec/step)
step 21180 	 loss = 0.380 (2.288 sec/step)
step 21190 	 loss = 0.376 (2.352 sec/step)
step 21200 	 loss = 0.377 (2.254 sec/step)
step 21210 	 loss = 0.380 (2.469 sec/step)
step 21220 	 loss = 0.371 (2.339 sec/step)
step 21230 	 loss = 0.365 (2.301 sec/step)
step 21240 	 loss = 0.359 (2.402 sec/step)
step 21250 	 loss = 0.370 (2.406 sec/step)
step 21260 	 loss = 0.378 (2.460 sec/step)
step 21270 	 loss = 0.340 (3.990 sec/step)
step 21280 	 loss = 0.385 (2.546 sec/step)
step 21290 	 loss = 0.386 (2.454 sec/step)
step 21300 	 loss = 0.367 (2.901 sec/step)
step 21310 	 loss = 0.376 (2.724 sec/step)
step 21320 	 loss = 0.360 (2.699 sec/step)
step 21330 	 loss = 0.360 (3.021 sec/step)
step 21340 	 loss = 0.370 (2.714 sec/step)
step 21350 	 loss = 0.370 (2.783 sec/step)
step 21360 	 loss = 0.399 (3.120 sec/step)
step 21370 	 loss = 0.364 (2.804 sec/step)
step 21380 	 loss = 0.378 (3.239 sec/step)
step 21390 	 loss = 0.367 (2.625 sec/step)
step 21400 	 loss = 0.369 (2.924 sec/step)
step 21410 	 loss = 0.374 (2.876 sec/step)
step 21420 	 loss = 0.376 (2.655 sec/step)
step 21430 	 loss = 0.368 (2.997 sec/step)
step 21440 	 loss = 0.379 (2.799 sec/step)
step 21450 	 loss = 0.372 (2.638 sec/step)
step 21460 	 loss = 0.392 (3.130 sec/step)
step 21470 	 loss = 0.375 (2.660 sec/step)
step 21480 	 loss = 0.394 (2.860 sec/step)
step 21490 	 loss = 0.394 (2.609 sec/step)
step 21500 	 loss = 0.392 (3.077 sec/step)
step 21510 	 loss = 0.381 (3.345 sec/step)
step 21520 	 loss = 0.375 (2.554 sec/step)
step 21530 	 loss = 0.393 (2.679 sec/step)
step 21540 	 loss = 0.397 (2.625 sec/step)
step 21550 	 loss = 0.403 (2.659 sec/step)
step 21560 	 loss = 0.379 (3.135 sec/step)
step 21570 	 loss = 0.395 (2.765 sec/step)
step 21580 	 loss = 0.366 (3.082 sec/step)
step 21590 	 loss = 0.379 (2.876 sec/step)
step 21600 	 loss = 0.388 (2.656 sec/step)
step 21610 	 loss = 0.369 (2.948 sec/step)
step 21620 	 loss = 0.373 (2.871 sec/step)
step 21630 	 loss = 0.359 (2.625 sec/step)
step 21640 	 loss = 0.389 (2.906 sec/step)
step 21650 	 loss = 0.359 (2.710 sec/step)
step 21660 	 loss = 0.398 (3.208 sec/step)
step 21670 	 loss = 0.357 (3.030 sec/step)
step 21680 	 loss = 0.372 (2.895 sec/step)
step 21690 	 loss = 0.373 (3.069 sec/step)
step 21700 	 loss = 0.403 (2.725 sec/step)
step 21710 	 loss = 0.336 (3.251 sec/step)
step 21720 	 loss = 0.361 (3.169 sec/step)
step 21730 	 loss = 0.377 (2.809 sec/step)
step 21740 	 loss = 0.385 (2.716 sec/step)
step 21750 	 loss = 0.383 (2.750 sec/step)
step 21760 	 loss = 0.380 (2.816 sec/step)
step 21770 	 loss = 0.384 (2.852 sec/step)
step 21780 	 loss = 0.397 (2.667 sec/step)
step 21790 	 loss = 0.376 (2.953 sec/step)
step 21800 	 loss = 0.378 (2.580 sec/step)
step 21810 	 loss = 0.387 (2.770 sec/step)
step 21820 	 loss = 0.390 (2.888 sec/step)
step 21830 	 loss = 0.393 (2.756 sec/step)
step 21840 	 loss = 0.390 (2.739 sec/step)
step 21850 	 loss = 0.376 (3.032 sec/step)
step 21860 	 loss = 0.354 (3.154 sec/step)
step 21870 	 loss = 0.391 (2.743 sec/step)
step 21880 	 loss = 0.369 (2.914 sec/step)
step 21890 	 loss = 0.356 (2.703 sec/step)
step 21900 	 loss = 0.392 (2.644 sec/step)
step 21910 	 loss = 0.395 (2.761 sec/step)
step 21920 	 loss = 0.388 (2.934 sec/step)
step 21930 	 loss = 0.360 (2.861 sec/step)
step 21940 	 loss = 0.366 (2.743 sec/step)
step 21950 	 loss = 0.370 (2.930 sec/step)
step 21960 	 loss = 0.372 (2.761 sec/step)
step 21970 	 loss = 0.359 (2.824 sec/step)
step 21980 	 loss = 0.385 (2.790 sec/step)
step 21990 	 loss = 0.363 (3.133 sec/step)
step 22000 	 loss = 0.387 (2.921 sec/step)
step 22010 	 loss = 0.403 (3.005 sec/step)
step 22020 	 loss = 0.376 (2.634 sec/step)
step 22030 	 loss = 0.397 (2.769 sec/step)
step 22040 	 loss = 0.380 (2.708 sec/step)
step 22050 	 loss = 0.387 (2.794 sec/step)
step 22060 	 loss = 0.399 (2.948 sec/step)
step 22070 	 loss = 0.359 (2.695 sec/step)
step 22080 	 loss = 0.387 (2.878 sec/step)
step 22090 	 loss = 0.358 (3.095 sec/step)
step 22100 	 loss = 0.376 (2.818 sec/step)
step 22110 	 loss = 0.386 (2.785 sec/step)
step 22120 	 loss = 0.366 (2.875 sec/step)
step 22130 	 loss = 0.387 (2.499 sec/step)
step 22140 	 loss = 0.371 (2.748 sec/step)
step 22150 	 loss = 0.389 (2.846 sec/step)
step 22160 	 loss = 0.348 (2.471 sec/step)
step 22170 	 loss = 0.342 (2.988 sec/step)
step 22180 	 loss = 0.382 (2.745 sec/step)
step 22190 	 loss = 0.378 (2.678 sec/step)
step 22200 	 loss = 0.400 (2.754 sec/step)
step 22210 	 loss = 0.383 (2.922 sec/step)
step 22220 	 loss = 0.384 (2.712 sec/step)
step 22230 	 loss = 0.398 (2.789 sec/step)
step 22240 	 loss = 0.388 (2.568 sec/step)
Training Epoch 6/100
step 22250 	 loss = 0.354 (2.833 sec/step)
step 22260 	 loss = 0.343 (3.162 sec/step)
step 22270 	 loss = 0.345 (3.185 sec/step)
step 22280 	 loss = 0.330 (3.421 sec/step)
step 22290 	 loss = 0.320 (2.846 sec/step)
step 22300 	 loss = 0.329 (2.967 sec/step)
step 22310 	 loss = 0.357 (3.029 sec/step)
step 22320 	 loss = 0.315 (2.761 sec/step)
step 22330 	 loss = 0.338 (2.582 sec/step)
step 22340 	 loss = 0.324 (3.010 sec/step)
step 22350 	 loss = 0.334 (2.865 sec/step)
step 22360 	 loss = 0.333 (2.696 sec/step)
step 22370 	 loss = 0.322 (2.576 sec/step)
step 22380 	 loss = 0.303 (2.986 sec/step)
step 22390 	 loss = 0.328 (3.023 sec/step)
step 22400 	 loss = 0.359 (2.686 sec/step)
step 22410 	 loss = 0.307 (2.769 sec/step)
step 22420 	 loss = 0.328 (3.332 sec/step)
step 22430 	 loss = 0.313 (2.927 sec/step)
step 22440 	 loss = 0.346 (2.723 sec/step)
step 22450 	 loss = 0.333 (2.900 sec/step)
step 22460 	 loss = 0.337 (2.867 sec/step)
step 22470 	 loss = 0.345 (2.816 sec/step)
step 22480 	 loss = 0.336 (2.554 sec/step)
step 22490 	 loss = 0.314 (2.834 sec/step)
step 22500 	 loss = 0.307 (2.699 sec/step)
step 22510 	 loss = 0.320 (2.957 sec/step)
step 22520 	 loss = 0.313 (2.521 sec/step)
step 22530 	 loss = 0.330 (2.617 sec/step)
step 22540 	 loss = 0.327 (2.621 sec/step)
step 22550 	 loss = 0.343 (2.720 sec/step)
step 22560 	 loss = 0.352 (2.832 sec/step)
step 22570 	 loss = 0.313 (3.032 sec/step)
step 22580 	 loss = 0.300 (2.792 sec/step)
step 22590 	 loss = 0.343 (2.957 sec/step)
step 22600 	 loss = 0.337 (2.622 sec/step)
step 22610 	 loss = 0.314 (2.678 sec/step)
step 22620 	 loss = 0.319 (2.768 sec/step)
step 22630 	 loss = 0.323 (2.634 sec/step)
step 22640 	 loss = 0.349 (3.103 sec/step)
step 22650 	 loss = 0.343 (2.889 sec/step)
step 22660 	 loss = 0.336 (2.769 sec/step)
step 22670 	 loss = 0.340 (2.983 sec/step)
step 22680 	 loss = 0.337 (2.730 sec/step)
step 22690 	 loss = 0.328 (3.042 sec/step)
step 22700 	 loss = 0.321 (3.037 sec/step)
step 22710 	 loss = 0.327 (2.814 sec/step)
step 22720 	 loss = 0.330 (2.820 sec/step)
step 22730 	 loss = 0.317 (2.728 sec/step)
step 22740 	 loss = 0.328 (2.930 sec/step)
step 22750 	 loss = 0.325 (2.631 sec/step)
step 22760 	 loss = 0.362 (2.960 sec/step)
step 22770 	 loss = 0.308 (3.138 sec/step)
step 22780 	 loss = 0.337 (2.970 sec/step)
step 22790 	 loss = 0.329 (3.100 sec/step)
step 22800 	 loss = 0.340 (2.708 sec/step)
step 22810 	 loss = 0.312 (2.672 sec/step)
step 22820 	 loss = 0.312 (3.084 sec/step)
step 22830 	 loss = 0.327 (2.637 sec/step)
step 22840 	 loss = 0.299 (2.891 sec/step)
step 22850 	 loss = 0.313 (2.795 sec/step)
step 22860 	 loss = 0.319 (2.786 sec/step)
step 22870 	 loss = 0.329 (2.944 sec/step)
step 22880 	 loss = 0.336 (2.810 sec/step)
step 22890 	 loss = 0.313 (2.964 sec/step)
step 22900 	 loss = 0.328 (2.921 sec/step)
step 22910 	 loss = 0.312 (2.742 sec/step)
step 22920 	 loss = 0.329 (3.046 sec/step)
step 22930 	 loss = 0.339 (2.650 sec/step)
step 22940 	 loss = 0.342 (2.894 sec/step)
step 22950 	 loss = 0.283 (2.643 sec/step)
step 22960 	 loss = 0.337 (2.879 sec/step)
step 22970 	 loss = 0.345 (2.891 sec/step)
step 22980 	 loss = 0.311 (2.792 sec/step)
step 22990 	 loss = 0.313 (2.875 sec/step)
step 23000 	 loss = 0.323 (2.657 sec/step)
step 23010 	 loss = 0.320 (2.831 sec/step)
step 23020 	 loss = 0.328 (2.750 sec/step)
step 23030 	 loss = 0.336 (2.687 sec/step)
step 23040 	 loss = 0.335 (3.072 sec/step)
step 23050 	 loss = 0.325 (3.068 sec/step)
step 23060 	 loss = 0.323 (3.226 sec/step)
step 23070 	 loss = 0.335 (2.808 sec/step)
step 23080 	 loss = 0.328 (2.827 sec/step)
step 23090 	 loss = 0.331 (3.045 sec/step)
step 23100 	 loss = 0.339 (2.541 sec/step)
step 23110 	 loss = 0.313 (2.764 sec/step)
step 23120 	 loss = 0.326 (2.770 sec/step)
step 23130 	 loss = 0.346 (2.933 sec/step)
step 23140 	 loss = 0.327 (2.871 sec/step)
step 23150 	 loss = 0.336 (2.625 sec/step)
step 23160 	 loss = 0.330 (2.804 sec/step)
step 23170 	 loss = 0.310 (2.696 sec/step)
step 23180 	 loss = 0.321 (2.979 sec/step)
step 23190 	 loss = 0.329 (3.042 sec/step)
step 23200 	 loss = 0.324 (3.010 sec/step)
step 23210 	 loss = 0.344 (2.820 sec/step)
step 23220 	 loss = 0.342 (3.045 sec/step)
step 23230 	 loss = 0.339 (2.907 sec/step)
step 23240 	 loss = 0.309 (3.032 sec/step)
step 23250 	 loss = 0.319 (2.873 sec/step)
step 23260 	 loss = 0.326 (2.875 sec/step)
step 23270 	 loss = 0.340 (2.627 sec/step)
step 23280 	 loss = 0.341 (2.634 sec/step)
step 23290 	 loss = 0.356 (2.691 sec/step)
step 23300 	 loss = 0.329 (2.855 sec/step)
step 23310 	 loss = 0.331 (2.924 sec/step)
step 23320 	 loss = 0.330 (3.151 sec/step)
step 23330 	 loss = 0.350 (2.626 sec/step)
step 23340 	 loss = 0.338 (2.843 sec/step)
step 23350 	 loss = 0.328 (2.637 sec/step)
step 23360 	 loss = 0.317 (2.612 sec/step)
step 23370 	 loss = 0.344 (3.017 sec/step)
step 23380 	 loss = 0.299 (2.763 sec/step)
step 23390 	 loss = 0.315 (2.791 sec/step)
step 23400 	 loss = 0.320 (2.663 sec/step)
step 23410 	 loss = 0.315 (2.645 sec/step)
step 23420 	 loss = 0.339 (2.669 sec/step)
step 23430 	 loss = 0.307 (2.645 sec/step)
step 23440 	 loss = 0.336 (2.764 sec/step)
step 23450 	 loss = 0.346 (2.886 sec/step)
step 23460 	 loss = 0.314 (3.334 sec/step)
step 23470 	 loss = 0.334 (2.999 sec/step)
step 23480 	 loss = 0.331 (2.930 sec/step)
step 23490 	 loss = 0.323 (2.801 sec/step)
step 23500 	 loss = 0.308 (2.778 sec/step)
step 23510 	 loss = 0.323 (2.932 sec/step)
step 23520 	 loss = 0.330 (2.859 sec/step)
step 23530 	 loss = 0.315 (2.771 sec/step)
step 23540 	 loss = 0.334 (3.090 sec/step)
step 23550 	 loss = 0.323 (2.774 sec/step)
step 23560 	 loss = 0.296 (2.906 sec/step)
step 23570 	 loss = 0.321 (2.533 sec/step)
step 23580 	 loss = 0.329 (2.627 sec/step)
step 23590 	 loss = 0.347 (2.936 sec/step)
step 23600 	 loss = 0.350 (2.973 sec/step)
step 23610 	 loss = 0.327 (2.628 sec/step)
step 23620 	 loss = 0.304 (3.341 sec/step)
step 23630 	 loss = 0.331 (2.897 sec/step)
step 23640 	 loss = 0.340 (2.624 sec/step)
step 23650 	 loss = 0.324 (2.962 sec/step)
step 23660 	 loss = 0.312 (2.615 sec/step)
step 23670 	 loss = 0.326 (2.895 sec/step)
step 23680 	 loss = 0.313 (2.871 sec/step)
step 23690 	 loss = 0.308 (2.767 sec/step)
step 23700 	 loss = 0.312 (2.901 sec/step)
step 23710 	 loss = 0.332 (2.652 sec/step)
step 23720 	 loss = 0.329 (2.881 sec/step)
step 23730 	 loss = 0.323 (2.717 sec/step)
step 23740 	 loss = 0.321 (2.850 sec/step)
step 23750 	 loss = 0.326 (2.675 sec/step)
step 23760 	 loss = 0.340 (2.511 sec/step)
step 23770 	 loss = 0.330 (2.636 sec/step)
step 23780 	 loss = 0.314 (2.599 sec/step)
step 23790 	 loss = 0.334 (2.914 sec/step)
step 23800 	 loss = 0.340 (3.026 sec/step)
step 23810 	 loss = 0.349 (2.907 sec/step)
step 23820 	 loss = 0.308 (2.679 sec/step)
step 23830 	 loss = 0.327 (2.803 sec/step)
step 23840 	 loss = 0.328 (2.871 sec/step)
step 23850 	 loss = 0.339 (2.696 sec/step)
step 23860 	 loss = 0.321 (2.915 sec/step)
step 23870 	 loss = 0.336 (2.710 sec/step)
step 23880 	 loss = 0.338 (2.659 sec/step)
step 23890 	 loss = 0.334 (2.646 sec/step)
step 23900 	 loss = 0.313 (2.592 sec/step)
step 23910 	 loss = 0.329 (2.927 sec/step)
step 23920 	 loss = 0.316 (2.824 sec/step)
step 23930 	 loss = 0.347 (2.590 sec/step)
step 23940 	 loss = 0.320 (2.694 sec/step)
step 23950 	 loss = 0.325 (2.900 sec/step)
step 23960 	 loss = 0.337 (2.573 sec/step)
step 23970 	 loss = 0.328 (2.732 sec/step)
step 23980 	 loss = 0.324 (2.892 sec/step)
step 23990 	 loss = 0.332 (2.778 sec/step)
step 24000 	 loss = 0.307 (2.547 sec/step)
step 24010 	 loss = 0.335 (2.584 sec/step)
step 24020 	 loss = 0.340 (2.485 sec/step)
step 24030 	 loss = 0.338 (2.691 sec/step)
step 24040 	 loss = 0.335 (2.841 sec/step)
step 24050 	 loss = 0.332 (3.218 sec/step)
step 24060 	 loss = 0.310 (2.942 sec/step)
step 24070 	 loss = 0.313 (2.779 sec/step)
step 24080 	 loss = 0.322 (2.649 sec/step)
step 24090 	 loss = 0.352 (2.754 sec/step)
step 24100 	 loss = 0.319 (2.689 sec/step)
step 24110 	 loss = 0.330 (2.857 sec/step)
step 24120 	 loss = 0.356 (2.797 sec/step)
step 24130 	 loss = 0.355 (2.935 sec/step)
step 24140 	 loss = 0.348 (2.833 sec/step)
step 24150 	 loss = 0.323 (2.419 sec/step)
step 24160 	 loss = 0.322 (2.598 sec/step)
step 24170 	 loss = 0.329 (2.569 sec/step)
step 24180 	 loss = 0.330 (2.900 sec/step)
step 24190 	 loss = 0.326 (2.802 sec/step)
step 24200 	 loss = 0.328 (3.038 sec/step)
step 24210 	 loss = 0.322 (2.907 sec/step)
step 24220 	 loss = 0.304 (2.745 sec/step)
step 24230 	 loss = 0.361 (2.878 sec/step)
step 24240 	 loss = 0.332 (2.984 sec/step)
step 24250 	 loss = 0.365 (2.988 sec/step)
step 24260 	 loss = 0.307 (2.715 sec/step)
step 24270 	 loss = 0.322 (2.818 sec/step)
step 24280 	 loss = 0.330 (2.945 sec/step)
step 24290 	 loss = 0.316 (2.685 sec/step)
step 24300 	 loss = 0.327 (2.660 sec/step)
step 24310 	 loss = 0.331 (2.680 sec/step)
step 24320 	 loss = 0.328 (3.409 sec/step)
step 24330 	 loss = 0.335 (2.665 sec/step)
step 24340 	 loss = 0.329 (2.909 sec/step)
step 24350 	 loss = 0.311 (3.216 sec/step)
step 24360 	 loss = 0.345 (2.719 sec/step)
step 24370 	 loss = 0.336 (2.921 sec/step)
step 24380 	 loss = 0.341 (2.960 sec/step)
step 24390 	 loss = 0.323 (2.856 sec/step)
step 24400 	 loss = 0.316 (2.623 sec/step)
step 24410 	 loss = 0.335 (2.892 sec/step)
step 24420 	 loss = 0.341 (2.855 sec/step)
step 24430 	 loss = 0.327 (2.754 sec/step)
step 24440 	 loss = 0.327 (2.771 sec/step)
step 24450 	 loss = 0.347 (3.315 sec/step)
step 24460 	 loss = 0.323 (2.638 sec/step)
step 24470 	 loss = 0.330 (2.675 sec/step)
step 24480 	 loss = 0.319 (2.827 sec/step)
step 24490 	 loss = 0.323 (2.798 sec/step)
step 24500 	 loss = 0.324 (2.771 sec/step)
step 24510 	 loss = 0.348 (2.763 sec/step)
step 24520 	 loss = 0.335 (2.779 sec/step)
step 24530 	 loss = 0.332 (2.810 sec/step)
step 24540 	 loss = 0.361 (2.831 sec/step)
step 24550 	 loss = 0.334 (2.956 sec/step)
step 24560 	 loss = 0.327 (2.822 sec/step)
step 24570 	 loss = 0.326 (2.953 sec/step)
step 24580 	 loss = 0.339 (2.652 sec/step)
step 24590 	 loss = 0.307 (2.915 sec/step)
step 24600 	 loss = 0.344 (2.945 sec/step)
step 24610 	 loss = 0.320 (3.028 sec/step)
step 24620 	 loss = 0.344 (2.926 sec/step)
step 24630 	 loss = 0.361 (2.678 sec/step)
step 24640 	 loss = 0.335 (2.910 sec/step)
step 24650 	 loss = 0.319 (2.693 sec/step)
step 24660 	 loss = 0.319 (3.249 sec/step)
step 24670 	 loss = 0.332 (2.706 sec/step)
step 24680 	 loss = 0.347 (2.702 sec/step)
step 24690 	 loss = 0.346 (2.939 sec/step)
step 24700 	 loss = 0.340 (2.844 sec/step)
step 24710 	 loss = 0.322 (2.922 sec/step)
step 24720 	 loss = 0.337 (2.850 sec/step)
step 24730 	 loss = 0.324 (2.683 sec/step)
step 24740 	 loss = 0.320 (2.833 sec/step)
step 24750 	 loss = 0.338 (2.933 sec/step)
step 24760 	 loss = 0.316 (2.422 sec/step)
step 24770 	 loss = 0.311 (2.815 sec/step)
step 24780 	 loss = 0.330 (3.000 sec/step)
step 24790 	 loss = 0.322 (2.343 sec/step)
step 24800 	 loss = 0.325 (2.797 sec/step)
step 24810 	 loss = 0.313 (2.723 sec/step)
step 24820 	 loss = 0.306 (2.873 sec/step)
step 24830 	 loss = 0.322 (2.805 sec/step)
step 24840 	 loss = 0.329 (2.807 sec/step)
step 24850 	 loss = 0.322 (2.637 sec/step)
step 24860 	 loss = 0.340 (3.118 sec/step)
step 24870 	 loss = 0.339 (2.865 sec/step)
step 24880 	 loss = 0.340 (2.822 sec/step)
step 24890 	 loss = 0.359 (2.789 sec/step)
step 24900 	 loss = 0.332 (2.873 sec/step)
step 24910 	 loss = 0.346 (2.916 sec/step)
step 24920 	 loss = 0.336 (2.829 sec/step)
step 24930 	 loss = 0.308 (2.704 sec/step)
step 24940 	 loss = 0.315 (2.811 sec/step)
step 24950 	 loss = 0.327 (2.742 sec/step)
step 24960 	 loss = 0.334 (2.588 sec/step)
step 24970 	 loss = 0.325 (2.606 sec/step)
step 24980 	 loss = 0.324 (2.741 sec/step)
step 24990 	 loss = 0.326 (3.039 sec/step)
step 25000 	 loss = 0.319 (3.183 sec/step)
step 25010 	 loss = 0.324 (2.881 sec/step)
step 25020 	 loss = 0.335 (2.779 sec/step)
step 25030 	 loss = 0.340 (3.257 sec/step)
step 25040 	 loss = 0.336 (2.911 sec/step)
step 25050 	 loss = 0.318 (3.095 sec/step)
step 25060 	 loss = 0.343 (2.675 sec/step)
step 25070 	 loss = 0.345 (2.722 sec/step)
step 25080 	 loss = 0.327 (2.819 sec/step)
step 25090 	 loss = 0.328 (2.979 sec/step)
step 25100 	 loss = 0.326 (2.838 sec/step)
step 25110 	 loss = 0.328 (2.884 sec/step)
step 25120 	 loss = 0.326 (2.924 sec/step)
step 25130 	 loss = 0.339 (3.014 sec/step)
step 25140 	 loss = 0.336 (3.174 sec/step)
step 25150 	 loss = 0.325 (2.568 sec/step)
step 25160 	 loss = 0.328 (2.942 sec/step)
step 25170 	 loss = 0.332 (2.987 sec/step)
step 25180 	 loss = 0.317 (2.787 sec/step)
step 25190 	 loss = 0.342 (2.629 sec/step)
step 25200 	 loss = 0.329 (3.108 sec/step)
step 25210 	 loss = 0.352 (3.002 sec/step)
step 25220 	 loss = 0.338 (2.967 sec/step)
step 25230 	 loss = 0.331 (2.610 sec/step)
step 25240 	 loss = 0.340 (3.120 sec/step)
step 25250 	 loss = 0.337 (2.938 sec/step)
step 25260 	 loss = 0.342 (2.743 sec/step)
step 25270 	 loss = 0.330 (2.628 sec/step)
step 25280 	 loss = 0.315 (2.910 sec/step)
step 25290 	 loss = 0.327 (2.992 sec/step)
step 25300 	 loss = 0.322 (2.889 sec/step)
step 25310 	 loss = 0.311 (2.699 sec/step)
step 25320 	 loss = 0.359 (2.886 sec/step)
step 25330 	 loss = 0.344 (2.895 sec/step)
step 25340 	 loss = 0.355 (2.772 sec/step)
step 25350 	 loss = 0.344 (2.865 sec/step)
step 25360 	 loss = 0.338 (3.121 sec/step)
step 25370 	 loss = 0.324 (2.776 sec/step)
step 25380 	 loss = 0.347 (2.834 sec/step)
step 25390 	 loss = 0.341 (2.842 sec/step)
step 25400 	 loss = 0.360 (2.781 sec/step)
step 25410 	 loss = 0.337 (2.712 sec/step)
step 25420 	 loss = 0.327 (2.533 sec/step)
step 25430 	 loss = 0.332 (2.635 sec/step)
step 25440 	 loss = 0.339 (2.750 sec/step)
step 25450 	 loss = 0.352 (2.670 sec/step)
step 25460 	 loss = 0.340 (2.944 sec/step)
step 25470 	 loss = 0.333 (2.604 sec/step)
step 25480 	 loss = 0.323 (3.109 sec/step)
step 25490 	 loss = 0.353 (2.662 sec/step)
step 25500 	 loss = 0.334 (2.832 sec/step)
step 25510 	 loss = 0.333 (2.974 sec/step)
step 25520 	 loss = 0.333 (2.929 sec/step)
step 25530 	 loss = 0.336 (2.921 sec/step)
step 25540 	 loss = 0.322 (2.903 sec/step)
step 25550 	 loss = 0.310 (2.925 sec/step)
step 25560 	 loss = 0.329 (2.861 sec/step)
step 25570 	 loss = 0.325 (2.903 sec/step)
step 25580 	 loss = 0.305 (2.597 sec/step)
step 25590 	 loss = 0.313 (2.552 sec/step)
step 25600 	 loss = 0.343 (2.906 sec/step)
step 25610 	 loss = 0.340 (2.960 sec/step)
step 25620 	 loss = 0.328 (3.274 sec/step)
step 25630 	 loss = 0.326 (2.839 sec/step)
step 25640 	 loss = 0.350 (2.904 sec/step)
step 25650 	 loss = 0.331 (2.843 sec/step)
step 25660 	 loss = 0.351 (2.909 sec/step)
step 25670 	 loss = 0.347 (2.682 sec/step)
step 25680 	 loss = 0.337 (2.935 sec/step)
step 25690 	 loss = 0.314 (2.902 sec/step)
step 25700 	 loss = 0.318 (3.023 sec/step)
step 25710 	 loss = 0.355 (2.736 sec/step)
step 25720 	 loss = 0.362 (2.907 sec/step)
step 25730 	 loss = 0.331 (2.736 sec/step)
step 25740 	 loss = 0.347 (2.897 sec/step)
step 25750 	 loss = 0.345 (2.929 sec/step)
step 25760 	 loss = 0.331 (2.802 sec/step)
step 25770 	 loss = 0.329 (2.956 sec/step)
step 25780 	 loss = 0.354 (2.715 sec/step)
step 25790 	 loss = 0.345 (2.891 sec/step)
step 25800 	 loss = 0.334 (2.803 sec/step)
step 25810 	 loss = 0.320 (3.084 sec/step)
step 25820 	 loss = 0.333 (2.859 sec/step)
step 25830 	 loss = 0.337 (2.906 sec/step)
step 25840 	 loss = 0.299 (2.735 sec/step)
step 25850 	 loss = 0.344 (3.102 sec/step)
step 25860 	 loss = 0.354 (2.706 sec/step)
step 25870 	 loss = 0.343 (2.866 sec/step)
step 25880 	 loss = 0.326 (2.627 sec/step)
step 25890 	 loss = 0.368 (2.780 sec/step)
step 25900 	 loss = 0.347 (3.057 sec/step)
step 25910 	 loss = 0.332 (3.151 sec/step)
step 25920 	 loss = 0.329 (2.914 sec/step)
step 25930 	 loss = 0.336 (2.827 sec/step)
step 25940 	 loss = 0.329 (2.667 sec/step)
step 25950 	 loss = 0.314 (2.872 sec/step)
Training Epoch 7/100
step 25960 	 loss = 0.298 (2.836 sec/step)
step 25970 	 loss = 0.299 (3.027 sec/step)
step 25980 	 loss = 0.292 (2.735 sec/step)
step 25990 	 loss = 0.298 (2.761 sec/step)
step 26000 	 loss = 0.279 (2.993 sec/step)
step 26010 	 loss = 0.320 (2.766 sec/step)
step 26020 	 loss = 0.280 (2.811 sec/step)
step 26030 	 loss = 0.299 (2.767 sec/step)
step 26040 	 loss = 0.307 (2.933 sec/step)
step 26050 	 loss = 0.309 (3.008 sec/step)
step 26060 	 loss = 0.305 (2.967 sec/step)
step 26070 	 loss = 0.284 (3.016 sec/step)
step 26080 	 loss = 0.294 (2.527 sec/step)
step 26090 	 loss = 0.299 (2.782 sec/step)
step 26100 	 loss = 0.281 (2.796 sec/step)
step 26110 	 loss = 0.306 (3.152 sec/step)
step 26120 	 loss = 0.290 (2.846 sec/step)
step 26130 	 loss = 0.312 (2.822 sec/step)
step 26140 	 loss = 0.311 (2.979 sec/step)
step 26150 	 loss = 0.306 (2.987 sec/step)
step 26160 	 loss = 0.284 (2.853 sec/step)
step 26170 	 loss = 0.276 (2.852 sec/step)
step 26180 	 loss = 0.304 (2.879 sec/step)
step 26190 	 loss = 0.312 (3.442 sec/step)
step 26200 	 loss = 0.298 (2.725 sec/step)
step 26210 	 loss = 0.309 (2.944 sec/step)
step 26220 	 loss = 0.285 (3.206 sec/step)
step 26230 	 loss = 0.303 (2.880 sec/step)
step 26240 	 loss = 0.285 (3.078 sec/step)
step 26250 	 loss = 0.291 (2.696 sec/step)
step 26260 	 loss = 0.288 (2.857 sec/step)
step 26270 	 loss = 0.310 (2.570 sec/step)
step 26280 	 loss = 0.294 (3.078 sec/step)
step 26290 	 loss = 0.282 (2.895 sec/step)
step 26300 	 loss = 0.313 (2.730 sec/step)
step 26310 	 loss = 0.314 (2.887 sec/step)
step 26320 	 loss = 0.308 (2.699 sec/step)
step 26330 	 loss = 0.296 (2.978 sec/step)
step 26340 	 loss = 0.304 (2.838 sec/step)
step 26350 	 loss = 0.289 (3.076 sec/step)
step 26360 	 loss = 0.297 (3.060 sec/step)
step 26370 	 loss = 0.325 (2.720 sec/step)
step 26380 	 loss = 0.319 (2.891 sec/step)
step 26390 	 loss = 0.300 (2.964 sec/step)
step 26400 	 loss = 0.313 (3.260 sec/step)
step 26410 	 loss = 0.302 (2.695 sec/step)
step 26420 	 loss = 0.300 (2.772 sec/step)
step 26430 	 loss = 0.279 (3.111 sec/step)
step 26440 	 loss = 0.320 (3.155 sec/step)
step 26450 	 loss = 0.292 (2.857 sec/step)
step 26460 	 loss = 0.274 (2.709 sec/step)
step 26470 	 loss = 0.297 (2.757 sec/step)
step 26480 	 loss = 0.304 (2.898 sec/step)
step 26490 	 loss = 0.306 (2.553 sec/step)
step 26500 	 loss = 0.280 (3.022 sec/step)
step 26510 	 loss = 0.295 (2.844 sec/step)
step 26520 	 loss = 0.282 (2.726 sec/step)
step 26530 	 loss = 0.293 (2.896 sec/step)
step 26540 	 loss = 0.316 (2.643 sec/step)
step 26550 	 loss = 0.302 (2.453 sec/step)
step 26560 	 loss = 0.301 (2.946 sec/step)
step 26570 	 loss = 0.320 (2.863 sec/step)
step 26580 	 loss = 0.298 (2.860 sec/step)
step 26590 	 loss = 0.296 (3.424 sec/step)
step 26600 	 loss = 0.294 (2.834 sec/step)
step 26610 	 loss = 0.286 (3.035 sec/step)
step 26620 	 loss = 0.300 (2.926 sec/step)
step 26630 	 loss = 0.299 (2.971 sec/step)
step 26640 	 loss = 0.286 (2.681 sec/step)
step 26650 	 loss = 0.315 (2.478 sec/step)
step 26660 	 loss = 0.303 (2.908 sec/step)
step 26670 	 loss = 0.291 (2.709 sec/step)
step 26680 	 loss = 0.311 (2.619 sec/step)
step 26690 	 loss = 0.306 (2.742 sec/step)
step 26700 	 loss = 0.306 (3.038 sec/step)
step 26710 	 loss = 0.288 (2.619 sec/step)
step 26720 	 loss = 0.292 (2.821 sec/step)
step 26730 	 loss = 0.300 (2.601 sec/step)
step 26740 	 loss = 0.291 (2.764 sec/step)
step 26750 	 loss = 0.308 (2.592 sec/step)
step 26760 	 loss = 0.287 (2.817 sec/step)
step 26770 	 loss = 0.313 (2.831 sec/step)
step 26780 	 loss = 0.294 (2.791 sec/step)
step 26790 	 loss = 0.292 (3.051 sec/step)
step 26800 	 loss = 0.286 (2.662 sec/step)
step 26810 	 loss = 0.304 (2.713 sec/step)
step 26820 	 loss = 0.321 (2.563 sec/step)
step 26830 	 loss = 0.274 (2.914 sec/step)
step 26840 	 loss = 0.314 (3.100 sec/step)
step 26850 	 loss = 0.320 (2.523 sec/step)
step 26860 	 loss = 0.303 (2.872 sec/step)
step 26870 	 loss = 0.291 (2.998 sec/step)
step 26880 	 loss = 0.317 (3.019 sec/step)
step 26890 	 loss = 0.324 (2.776 sec/step)
step 26900 	 loss = 0.307 (2.658 sec/step)
step 26910 	 loss = 0.295 (2.904 sec/step)
step 26920 	 loss = 0.289 (2.796 sec/step)
step 26930 	 loss = 0.302 (2.478 sec/step)
step 26940 	 loss = 0.304 (2.530 sec/step)
step 26950 	 loss = 0.298 (2.277 sec/step)
step 26960 	 loss = 0.308 (2.456 sec/step)
step 26970 	 loss = 0.298 (2.416 sec/step)
step 26980 	 loss = 0.324 (2.457 sec/step)
step 26990 	 loss = 0.292 (2.285 sec/step)
step 27000 	 loss = 0.302 (2.286 sec/step)
step 27010 	 loss = 0.287 (2.276 sec/step)
step 27020 	 loss = 0.294 (2.335 sec/step)
step 27030 	 loss = 0.304 (2.230 sec/step)
step 27040 	 loss = 0.302 (2.267 sec/step)
step 27050 	 loss = 0.291 (4.097 sec/step)
step 27060 	 loss = 0.317 (2.388 sec/step)
step 27070 	 loss = 0.314 (2.649 sec/step)
step 27080 	 loss = 0.304 (3.269 sec/step)
step 27090 	 loss = 0.312 (2.723 sec/step)
step 27100 	 loss = 0.294 (2.599 sec/step)
step 27110 	 loss = 0.327 (3.247 sec/step)
step 27120 	 loss = 0.288 (2.744 sec/step)
step 27130 	 loss = 0.299 (2.920 sec/step)
step 27140 	 loss = 0.315 (2.877 sec/step)
step 27150 	 loss = 0.299 (3.030 sec/step)
step 27160 	 loss = 0.323 (2.739 sec/step)
step 27170 	 loss = 0.288 (2.860 sec/step)
step 27180 	 loss = 0.309 (2.620 sec/step)
step 27190 	 loss = 0.295 (3.198 sec/step)
step 27200 	 loss = 0.304 (2.773 sec/step)
step 27210 	 loss = 0.298 (2.700 sec/step)
step 27220 	 loss = 0.292 (2.571 sec/step)
step 27230 	 loss = 0.287 (2.829 sec/step)
step 27240 	 loss = 0.312 (3.036 sec/step)
step 27250 	 loss = 0.324 (2.803 sec/step)
step 27260 	 loss = 0.313 (2.773 sec/step)
step 27270 	 loss = 0.306 (2.858 sec/step)
step 27280 	 loss = 0.308 (2.709 sec/step)
step 27290 	 loss = 0.326 (3.022 sec/step)
step 27300 	 loss = 0.334 (2.860 sec/step)
step 27310 	 loss = 0.296 (2.918 sec/step)
step 27320 	 loss = 0.288 (2.768 sec/step)
step 27330 	 loss = 0.305 (2.557 sec/step)
step 27340 	 loss = 0.304 (2.757 sec/step)
step 27350 	 loss = 0.307 (2.622 sec/step)
step 27360 	 loss = 0.288 (3.028 sec/step)
step 27370 	 loss = 0.294 (3.291 sec/step)
step 27380 	 loss = 0.320 (2.778 sec/step)
step 27390 	 loss = 0.305 (2.858 sec/step)
step 27400 	 loss = 0.309 (3.038 sec/step)
step 27410 	 loss = 0.298 (2.677 sec/step)
step 27420 	 loss = 0.306 (2.853 sec/step)
step 27430 	 loss = 0.305 (2.802 sec/step)
step 27440 	 loss = 0.305 (2.925 sec/step)
step 27450 	 loss = 0.319 (2.881 sec/step)
step 27460 	 loss = 0.305 (2.907 sec/step)
step 27470 	 loss = 0.315 (2.579 sec/step)
step 27480 	 loss = 0.288 (3.063 sec/step)
step 27490 	 loss = 0.324 (2.836 sec/step)
step 27500 	 loss = 0.314 (2.615 sec/step)
step 27510 	 loss = 0.324 (2.558 sec/step)
step 27520 	 loss = 0.302 (2.827 sec/step)
step 27530 	 loss = 0.296 (2.552 sec/step)
step 27540 	 loss = 0.296 (2.992 sec/step)
step 27550 	 loss = 0.302 (2.891 sec/step)
step 27560 	 loss = 0.285 (3.261 sec/step)
step 27570 	 loss = 0.317 (2.815 sec/step)
step 27580 	 loss = 0.306 (2.572 sec/step)
step 27590 	 loss = 0.306 (2.847 sec/step)
step 27600 	 loss = 0.291 (2.855 sec/step)
step 27610 	 loss = 0.325 (2.898 sec/step)
step 27620 	 loss = 0.297 (2.888 sec/step)
step 27630 	 loss = 0.302 (2.857 sec/step)
step 27640 	 loss = 0.301 (3.098 sec/step)
step 27650 	 loss = 0.325 (2.677 sec/step)
step 27660 	 loss = 0.319 (3.029 sec/step)
step 27670 	 loss = 0.301 (3.070 sec/step)
step 27680 	 loss = 0.277 (2.963 sec/step)
step 27690 	 loss = 0.320 (2.883 sec/step)
step 27700 	 loss = 0.292 (2.732 sec/step)
step 27710 	 loss = 0.319 (2.952 sec/step)
step 27720 	 loss = 0.312 (2.584 sec/step)
step 27730 	 loss = 0.295 (2.820 sec/step)
step 27740 	 loss = 0.283 (2.908 sec/step)
step 27750 	 loss = 0.305 (2.730 sec/step)
step 27760 	 loss = 0.316 (3.019 sec/step)
step 27770 	 loss = 0.302 (3.157 sec/step)
step 27780 	 loss = 0.298 (2.827 sec/step)
step 27790 	 loss = 0.320 (2.664 sec/step)
step 27800 	 loss = 0.310 (2.693 sec/step)
step 27810 	 loss = 0.299 (2.853 sec/step)
step 27820 	 loss = 0.289 (2.701 sec/step)
step 27830 	 loss = 0.293 (2.894 sec/step)
step 27840 	 loss = 0.318 (2.935 sec/step)
step 27850 	 loss = 0.285 (2.605 sec/step)
step 27860 	 loss = 0.283 (2.999 sec/step)
step 27870 	 loss = 0.306 (3.070 sec/step)
step 27880 	 loss = 0.303 (2.887 sec/step)
step 27890 	 loss = 0.289 (2.503 sec/step)
step 27900 	 loss = 0.334 (2.642 sec/step)
step 27910 	 loss = 0.308 (2.599 sec/step)
step 27920 	 loss = 0.293 (2.770 sec/step)
step 27930 	 loss = 0.308 (2.554 sec/step)
step 27940 	 loss = 0.304 (2.659 sec/step)
step 27950 	 loss = 0.300 (2.992 sec/step)
step 27960 	 loss = 0.326 (2.521 sec/step)
step 27970 	 loss = 0.308 (2.923 sec/step)
step 27980 	 loss = 0.298 (2.905 sec/step)
step 27990 	 loss = 0.299 (2.668 sec/step)
step 28000 	 loss = 0.307 (2.536 sec/step)
step 28010 	 loss = 0.287 (2.703 sec/step)
step 28020 	 loss = 0.302 (2.735 sec/step)
step 28030 	 loss = 0.301 (2.660 sec/step)
step 28040 	 loss = 0.306 (2.882 sec/step)
step 28050 	 loss = 0.315 (2.697 sec/step)
step 28060 	 loss = 0.284 (3.171 sec/step)
step 28070 	 loss = 0.306 (2.703 sec/step)
step 28080 	 loss = 0.298 (2.803 sec/step)
step 28090 	 loss = 0.330 (3.030 sec/step)
step 28100 	 loss = 0.319 (2.853 sec/step)
step 28110 	 loss = 0.310 (3.014 sec/step)
step 28120 	 loss = 0.316 (3.034 sec/step)
step 28130 	 loss = 0.313 (2.839 sec/step)
step 28140 	 loss = 0.311 (2.777 sec/step)
step 28150 	 loss = 0.327 (2.904 sec/step)
step 28160 	 loss = 0.302 (2.633 sec/step)
step 28170 	 loss = 0.301 (2.685 sec/step)
step 28180 	 loss = 0.308 (2.871 sec/step)
step 28190 	 loss = 0.310 (2.681 sec/step)
step 28200 	 loss = 0.312 (2.607 sec/step)
step 28210 	 loss = 0.328 (2.965 sec/step)
step 28220 	 loss = 0.315 (3.032 sec/step)
step 28230 	 loss = 0.315 (2.861 sec/step)
step 28240 	 loss = 0.320 (2.801 sec/step)
step 28250 	 loss = 0.314 (2.609 sec/step)
step 28260 	 loss = 0.323 (2.609 sec/step)
step 28270 	 loss = 0.283 (2.797 sec/step)
step 28280 	 loss = 0.287 (2.663 sec/step)
step 28290 	 loss = 0.278 (2.969 sec/step)
step 28300 	 loss = 0.334 (2.861 sec/step)
step 28310 	 loss = 0.320 (2.885 sec/step)
step 28320 	 loss = 0.327 (2.622 sec/step)
step 28330 	 loss = 0.326 (2.727 sec/step)
step 28340 	 loss = 0.301 (2.749 sec/step)
step 28350 	 loss = 0.291 (2.812 sec/step)
step 28360 	 loss = 0.313 (2.683 sec/step)
step 28370 	 loss = 0.311 (2.701 sec/step)
step 28380 	 loss = 0.311 (3.132 sec/step)
step 28390 	 loss = 0.304 (2.736 sec/step)
step 28400 	 loss = 0.305 (2.793 sec/step)
step 28410 	 loss = 0.308 (3.042 sec/step)
step 28420 	 loss = 0.316 (2.708 sec/step)
step 28430 	 loss = 0.288 (2.665 sec/step)
step 28440 	 loss = 0.295 (2.811 sec/step)
step 28450 	 loss = 0.290 (2.846 sec/step)
step 28460 	 loss = 0.303 (2.772 sec/step)
step 28470 	 loss = 0.302 (2.587 sec/step)
step 28480 	 loss = 0.320 (2.856 sec/step)
step 28490 	 loss = 0.326 (2.810 sec/step)
step 28500 	 loss = 0.314 (3.107 sec/step)
step 28510 	 loss = 0.298 (2.840 sec/step)
step 28520 	 loss = 0.309 (2.714 sec/step)
step 28530 	 loss = 0.310 (3.012 sec/step)
step 28540 	 loss = 0.312 (2.867 sec/step)
step 28550 	 loss = 0.302 (2.421 sec/step)
step 28560 	 loss = 0.281 (2.574 sec/step)
step 28570 	 loss = 0.325 (2.926 sec/step)
step 28580 	 loss = 0.315 (2.685 sec/step)
step 28590 	 loss = 0.329 (2.894 sec/step)
step 28600 	 loss = 0.310 (3.070 sec/step)
step 28610 	 loss = 0.321 (2.860 sec/step)
step 28620 	 loss = 0.325 (2.510 sec/step)
step 28630 	 loss = 0.319 (2.884 sec/step)
step 28640 	 loss = 0.312 (2.856 sec/step)
step 28650 	 loss = 0.295 (2.679 sec/step)
step 28660 	 loss = 0.321 (2.674 sec/step)
step 28670 	 loss = 0.293 (2.762 sec/step)
step 28680 	 loss = 0.322 (2.638 sec/step)
step 28690 	 loss = 0.315 (2.595 sec/step)
step 28700 	 loss = 0.309 (2.883 sec/step)
step 28710 	 loss = 0.317 (2.928 sec/step)
step 28720 	 loss = 0.298 (2.464 sec/step)
step 28730 	 loss = 0.324 (2.647 sec/step)
step 28740 	 loss = 0.298 (2.599 sec/step)
step 28750 	 loss = 0.314 (2.732 sec/step)
step 28760 	 loss = 0.306 (2.723 sec/step)
step 28770 	 loss = 0.294 (2.969 sec/step)
step 28780 	 loss = 0.320 (2.939 sec/step)
step 28790 	 loss = 0.332 (3.004 sec/step)
step 28800 	 loss = 0.306 (3.034 sec/step)
step 28810 	 loss = 0.343 (2.883 sec/step)
step 28820 	 loss = 0.307 (2.765 sec/step)
step 28830 	 loss = 0.313 (3.075 sec/step)
step 28840 	 loss = 0.312 (3.161 sec/step)
step 28850 	 loss = 0.301 (2.830 sec/step)
step 28860 	 loss = 0.316 (2.786 sec/step)
step 28870 	 loss = 0.287 (2.852 sec/step)
step 28880 	 loss = 0.306 (3.120 sec/step)
step 28890 	 loss = 0.320 (2.797 sec/step)
step 28900 	 loss = 0.311 (2.799 sec/step)
step 28910 	 loss = 0.319 (2.740 sec/step)
step 28920 	 loss = 0.306 (3.066 sec/step)
step 28930 	 loss = 0.305 (2.856 sec/step)
step 28940 	 loss = 0.312 (2.572 sec/step)
step 28950 	 loss = 0.322 (2.926 sec/step)
step 28960 	 loss = 0.316 (2.761 sec/step)
step 28970 	 loss = 0.309 (2.631 sec/step)
step 28980 	 loss = 0.320 (2.787 sec/step)
step 28990 	 loss = 0.302 (2.998 sec/step)
step 29000 	 loss = 0.327 (2.782 sec/step)
step 29010 	 loss = 0.327 (3.205 sec/step)
step 29020 	 loss = 0.298 (2.922 sec/step)
step 29030 	 loss = 0.297 (3.109 sec/step)
step 29040 	 loss = 0.284 (3.020 sec/step)
step 29050 	 loss = 0.331 (2.801 sec/step)
step 29060 	 loss = 0.300 (2.555 sec/step)
step 29070 	 loss = 0.305 (2.904 sec/step)
step 29080 	 loss = 0.310 (2.576 sec/step)
step 29090 	 loss = 0.310 (2.769 sec/step)
step 29100 	 loss = 0.289 (2.875 sec/step)
step 29110 	 loss = 0.301 (3.191 sec/step)
step 29120 	 loss = 0.303 (3.012 sec/step)
step 29130 	 loss = 0.327 (2.684 sec/step)
step 29140 	 loss = 0.316 (2.801 sec/step)
step 29150 	 loss = 0.281 (2.687 sec/step)
step 29160 	 loss = 0.314 (2.892 sec/step)
step 29170 	 loss = 0.321 (3.096 sec/step)
step 29180 	 loss = 0.288 (2.539 sec/step)
step 29190 	 loss = 0.299 (2.894 sec/step)
step 29200 	 loss = 0.319 (2.751 sec/step)
step 29210 	 loss = 0.322 (2.521 sec/step)
step 29220 	 loss = 0.307 (2.962 sec/step)
step 29230 	 loss = 0.312 (3.115 sec/step)
step 29240 	 loss = 0.314 (2.919 sec/step)
step 29250 	 loss = 0.301 (2.648 sec/step)
step 29260 	 loss = 0.303 (3.114 sec/step)
step 29270 	 loss = 0.289 (3.089 sec/step)
step 29280 	 loss = 0.290 (2.904 sec/step)
step 29290 	 loss = 0.311 (2.908 sec/step)
step 29300 	 loss = 0.303 (2.760 sec/step)
step 29310 	 loss = 0.322 (2.816 sec/step)
step 29320 	 loss = 0.311 (2.722 sec/step)
step 29330 	 loss = 0.325 (2.539 sec/step)
step 29340 	 loss = 0.294 (2.771 sec/step)
step 29350 	 loss = 0.314 (2.771 sec/step)
step 29360 	 loss = 0.328 (3.188 sec/step)
step 29370 	 loss = 0.315 (2.795 sec/step)
step 29380 	 loss = 0.314 (2.691 sec/step)
step 29390 	 loss = 0.327 (2.755 sec/step)
step 29400 	 loss = 0.309 (2.790 sec/step)
step 29410 	 loss = 0.323 (2.441 sec/step)
step 29420 	 loss = 0.320 (3.399 sec/step)
step 29430 	 loss = 0.290 (2.651 sec/step)
step 29440 	 loss = 0.334 (2.803 sec/step)
step 29450 	 loss = 0.327 (2.918 sec/step)
step 29460 	 loss = 0.305 (2.827 sec/step)
step 29470 	 loss = 0.305 (2.758 sec/step)
step 29480 	 loss = 0.291 (2.740 sec/step)
step 29490 	 loss = 0.308 (2.907 sec/step)
step 29500 	 loss = 0.319 (3.071 sec/step)
step 29510 	 loss = 0.309 (2.912 sec/step)
step 29520 	 loss = 0.317 (2.989 sec/step)
step 29530 	 loss = 0.303 (2.555 sec/step)
step 29540 	 loss = 0.326 (2.761 sec/step)
step 29550 	 loss = 0.296 (2.821 sec/step)
step 29560 	 loss = 0.318 (2.982 sec/step)
step 29570 	 loss = 0.315 (3.039 sec/step)
step 29580 	 loss = 0.307 (2.853 sec/step)
step 29590 	 loss = 0.322 (2.747 sec/step)
step 29600 	 loss = 0.302 (2.826 sec/step)
step 29610 	 loss = 0.305 (2.545 sec/step)
step 29620 	 loss = 0.291 (2.735 sec/step)
step 29630 	 loss = 0.332 (3.023 sec/step)
step 29640 	 loss = 0.312 (2.830 sec/step)
step 29650 	 loss = 0.320 (2.899 sec/step)
step 29660 	 loss = 0.316 (3.089 sec/step)
Training Epoch 8/100
step 29670 	 loss = 0.288 (2.945 sec/step)
step 29680 	 loss = 0.273 (2.587 sec/step)
step 29690 	 loss = 0.284 (2.881 sec/step)
step 29700 	 loss = 0.277 (2.737 sec/step)
step 29710 	 loss = 0.289 (3.000 sec/step)
step 29720 	 loss = 0.284 (3.013 sec/step)
step 29730 	 loss = 0.289 (3.063 sec/step)
step 29740 	 loss = 0.299 (2.666 sec/step)
step 29750 	 loss = 0.275 (2.548 sec/step)
step 29760 	 loss = 0.286 (2.934 sec/step)
step 29770 	 loss = 0.273 (2.640 sec/step)
step 29780 	 loss = 0.271 (2.948 sec/step)
step 29790 	 loss = 0.279 (2.900 sec/step)
step 29800 	 loss = 0.279 (2.568 sec/step)
step 29810 	 loss = 0.293 (2.689 sec/step)
step 29820 	 loss = 0.272 (2.870 sec/step)
step 29830 	 loss = 0.288 (2.928 sec/step)
step 29840 	 loss = 0.271 (2.742 sec/step)
step 29850 	 loss = 0.275 (2.928 sec/step)
step 29860 	 loss = 0.311 (2.865 sec/step)
step 29870 	 loss = 0.257 (2.822 sec/step)
step 29880 	 loss = 0.285 (2.732 sec/step)
step 29890 	 loss = 0.280 (2.777 sec/step)
step 29900 	 loss = 0.280 (2.915 sec/step)
step 29910 	 loss = 0.269 (2.640 sec/step)
step 29920 	 loss = 0.260 (2.806 sec/step)
step 29930 	 loss = 0.287 (2.635 sec/step)
step 29940 	 loss = 0.276 (2.766 sec/step)
step 29950 	 loss = 0.292 (2.547 sec/step)
step 29960 	 loss = 0.285 (3.029 sec/step)
step 29970 	 loss = 0.282 (2.694 sec/step)
step 29980 	 loss = 0.287 (2.665 sec/step)
step 29990 	 loss = 0.291 (2.827 sec/step)
step 30000 	 loss = 0.288 (2.915 sec/step)
step 30010 	 loss = 0.286 (3.060 sec/step)
step 30020 	 loss = 0.266 (2.910 sec/step)
step 30030 	 loss = 0.289 (2.734 sec/step)
step 30040 	 loss = 0.291 (2.927 sec/step)
step 30050 	 loss = 0.293 (2.892 sec/step)
step 30060 	 loss = 0.284 (2.790 sec/step)
step 30070 	 loss = 0.266 (2.583 sec/step)
step 30080 	 loss = 0.278 (2.768 sec/step)
step 30090 	 loss = 0.267 (2.820 sec/step)
step 30100 	 loss = 0.278 (2.699 sec/step)
step 30110 	 loss = 0.271 (2.537 sec/step)
step 30120 	 loss = 0.272 (2.878 sec/step)
step 30130 	 loss = 0.278 (2.999 sec/step)
step 30140 	 loss = 0.293 (2.666 sec/step)
step 30150 	 loss = 0.269 (2.624 sec/step)
step 30160 	 loss = 0.284 (3.117 sec/step)
step 30170 	 loss = 0.269 (2.687 sec/step)
step 30180 	 loss = 0.273 (2.819 sec/step)
step 30190 	 loss = 0.286 (2.817 sec/step)
step 30200 	 loss = 0.284 (2.548 sec/step)
step 30210 	 loss = 0.283 (2.951 sec/step)
step 30220 	 loss = 0.275 (3.257 sec/step)
step 30230 	 loss = 0.276 (2.739 sec/step)
step 30240 	 loss = 0.278 (2.559 sec/step)
step 30250 	 loss = 0.280 (2.767 sec/step)
step 30260 	 loss = 0.288 (2.875 sec/step)
step 30270 	 loss = 0.277 (2.927 sec/step)
step 30280 	 loss = 0.282 (2.665 sec/step)
step 30290 	 loss = 0.290 (2.941 sec/step)
step 30300 	 loss = 0.278 (2.846 sec/step)
step 30310 	 loss = 0.277 (2.885 sec/step)
step 30320 	 loss = 0.273 (2.868 sec/step)
step 30330 	 loss = 0.271 (3.116 sec/step)
step 30340 	 loss = 0.251 (2.622 sec/step)
step 30350 	 loss = 0.276 (2.869 sec/step)
step 30360 	 loss = 0.267 (3.105 sec/step)
step 30370 	 loss = 0.279 (2.763 sec/step)
step 30380 	 loss = 0.278 (2.685 sec/step)
step 30390 	 loss = 0.291 (3.082 sec/step)
step 30400 	 loss = 0.282 (2.636 sec/step)
step 30410 	 loss = 0.275 (2.587 sec/step)
step 30420 	 loss = 0.272 (2.768 sec/step)
step 30430 	 loss = 0.284 (2.895 sec/step)
step 30440 	 loss = 0.285 (2.749 sec/step)
step 30450 	 loss = 0.267 (2.802 sec/step)
step 30460 	 loss = 0.274 (2.676 sec/step)
step 30470 	 loss = 0.275 (3.026 sec/step)
step 30480 	 loss = 0.296 (3.084 sec/step)
step 30490 	 loss = 0.282 (2.683 sec/step)
step 30500 	 loss = 0.279 (2.804 sec/step)
step 30510 	 loss = 0.277 (2.815 sec/step)
step 30520 	 loss = 0.296 (2.947 sec/step)
step 30530 	 loss = 0.287 (3.035 sec/step)
step 30540 	 loss = 0.284 (2.834 sec/step)
step 30550 	 loss = 0.274 (2.884 sec/step)
step 30560 	 loss = 0.288 (2.580 sec/step)
step 30570 	 loss = 0.285 (2.686 sec/step)
step 30580 	 loss = 0.267 (2.942 sec/step)
step 30590 	 loss = 0.272 (2.801 sec/step)
step 30600 	 loss = 0.283 (2.623 sec/step)
step 30610 	 loss = 0.280 (2.515 sec/step)
step 30620 	 loss = 0.270 (2.651 sec/step)
step 30630 	 loss = 0.276 (3.290 sec/step)
step 30640 	 loss = 0.283 (2.629 sec/step)
step 30650 	 loss = 0.289 (2.766 sec/step)
step 30660 	 loss = 0.281 (2.926 sec/step)
step 30670 	 loss = 0.277 (2.619 sec/step)
step 30680 	 loss = 0.281 (2.806 sec/step)
step 30690 	 loss = 0.287 (2.568 sec/step)
step 30700 	 loss = 0.282 (2.629 sec/step)
step 30710 	 loss = 0.301 (3.095 sec/step)
step 30720 	 loss = 0.272 (2.811 sec/step)
step 30730 	 loss = 0.291 (2.845 sec/step)
step 30740 	 loss = 0.280 (2.842 sec/step)
step 30750 	 loss = 0.264 (2.775 sec/step)
step 30760 	 loss = 0.271 (2.810 sec/step)
step 30770 	 loss = 0.302 (2.916 sec/step)
step 30780 	 loss = 0.287 (2.740 sec/step)
step 30790 	 loss = 0.291 (2.634 sec/step)
step 30800 	 loss = 0.280 (2.918 sec/step)
step 30810 	 loss = 0.274 (2.837 sec/step)
step 30820 	 loss = 0.294 (2.944 sec/step)
step 30830 	 loss = 0.291 (3.023 sec/step)
step 30840 	 loss = 0.277 (2.525 sec/step)
step 30850 	 loss = 0.282 (2.977 sec/step)
step 30860 	 loss = 0.276 (2.778 sec/step)
step 30870 	 loss = 0.273 (2.863 sec/step)
step 30880 	 loss = 0.278 (2.931 sec/step)
step 30890 	 loss = 0.282 (3.210 sec/step)
step 30900 	 loss = 0.297 (3.097 sec/step)
step 30910 	 loss = 0.263 (3.253 sec/step)
step 30920 	 loss = 0.277 (2.827 sec/step)
step 30930 	 loss = 0.273 (2.687 sec/step)
step 30940 	 loss = 0.277 (3.056 sec/step)
step 30950 	 loss = 0.273 (2.954 sec/step)
step 30960 	 loss = 0.283 (2.932 sec/step)
step 30970 	 loss = 0.273 (3.007 sec/step)
step 30980 	 loss = 0.271 (2.733 sec/step)
step 30990 	 loss = 0.280 (3.003 sec/step)
step 31000 	 loss = 0.287 (2.894 sec/step)
step 31010 	 loss = 0.255 (2.790 sec/step)
step 31020 	 loss = 0.283 (2.763 sec/step)
step 31030 	 loss = 0.290 (2.455 sec/step)
step 31040 	 loss = 0.284 (2.833 sec/step)
step 31050 	 loss = 0.285 (2.760 sec/step)
step 31060 	 loss = 0.279 (2.994 sec/step)
step 31070 	 loss = 0.280 (3.062 sec/step)
step 31080 	 loss = 0.251 (2.634 sec/step)
step 31090 	 loss = 0.304 (2.882 sec/step)
step 31100 	 loss = 0.278 (2.805 sec/step)
step 31110 	 loss = 0.285 (2.781 sec/step)
step 31120 	 loss = 0.292 (2.606 sec/step)
step 31130 	 loss = 0.291 (3.194 sec/step)
step 31140 	 loss = 0.298 (3.014 sec/step)
step 31150 	 loss = 0.286 (3.104 sec/step)
step 31160 	 loss = 0.291 (3.113 sec/step)
step 31170 	 loss = 0.289 (2.849 sec/step)
step 31180 	 loss = 0.287 (3.125 sec/step)
step 31190 	 loss = 0.282 (2.591 sec/step)
step 31200 	 loss = 0.296 (2.915 sec/step)
step 31210 	 loss = 0.275 (2.653 sec/step)
step 31220 	 loss = 0.268 (2.798 sec/step)
step 31230 	 loss = 0.282 (3.149 sec/step)
step 31240 	 loss = 0.284 (3.161 sec/step)
step 31250 	 loss = 0.277 (2.956 sec/step)
step 31260 	 loss = 0.291 (2.484 sec/step)
step 31270 	 loss = 0.271 (2.630 sec/step)
step 31280 	 loss = 0.271 (2.779 sec/step)
step 31290 	 loss = 0.284 (2.929 sec/step)
step 31300 	 loss = 0.278 (2.625 sec/step)
step 31310 	 loss = 0.278 (2.764 sec/step)
step 31320 	 loss = 0.296 (2.641 sec/step)
step 31330 	 loss = 0.275 (2.976 sec/step)
step 31340 	 loss = 0.285 (2.766 sec/step)
step 31350 	 loss = 0.280 (3.065 sec/step)
step 31360 	 loss = 0.286 (2.819 sec/step)
step 31370 	 loss = 0.298 (2.585 sec/step)
step 31380 	 loss = 0.276 (2.762 sec/step)
step 31390 	 loss = 0.285 (2.988 sec/step)
step 31400 	 loss = 0.297 (2.688 sec/step)
step 31410 	 loss = 0.291 (2.837 sec/step)
step 31420 	 loss = 0.298 (2.853 sec/step)
step 31430 	 loss = 0.301 (2.701 sec/step)
step 31440 	 loss = 0.318 (2.798 sec/step)
step 31450 	 loss = 0.279 (2.936 sec/step)
step 31460 	 loss = 0.277 (2.768 sec/step)
step 31470 	 loss = 0.279 (3.023 sec/step)
step 31480 	 loss = 0.295 (2.608 sec/step)
step 31490 	 loss = 0.285 (2.664 sec/step)
step 31500 	 loss = 0.308 (2.793 sec/step)
step 31510 	 loss = 0.291 (2.658 sec/step)
step 31520 	 loss = 0.276 (2.564 sec/step)
step 31530 	 loss = 0.274 (3.079 sec/step)
step 31540 	 loss = 0.287 (2.793 sec/step)
step 31550 	 loss = 0.288 (3.217 sec/step)
step 31560 	 loss = 0.297 (2.886 sec/step)
step 31570 	 loss = 0.267 (3.114 sec/step)
step 31580 	 loss = 0.280 (2.867 sec/step)
step 31590 	 loss = 0.287 (2.973 sec/step)
step 31600 	 loss = 0.286 (2.870 sec/step)
step 31610 	 loss = 0.286 (2.831 sec/step)
step 31620 	 loss = 0.288 (2.811 sec/step)
step 31630 	 loss = 0.281 (2.635 sec/step)
step 31640 	 loss = 0.277 (2.917 sec/step)
step 31650 	 loss = 0.276 (2.452 sec/step)
step 31660 	 loss = 0.288 (3.008 sec/step)
step 31670 	 loss = 0.292 (3.383 sec/step)
step 31680 	 loss = 0.267 (3.265 sec/step)
step 31690 	 loss = 0.273 (3.011 sec/step)
step 31700 	 loss = 0.288 (2.782 sec/step)
step 31710 	 loss = 0.280 (2.509 sec/step)
step 31720 	 loss = 0.277 (2.640 sec/step)
step 31730 	 loss = 0.288 (2.845 sec/step)
step 31740 	 loss = 0.291 (2.991 sec/step)
step 31750 	 loss = 0.269 (3.192 sec/step)
step 31760 	 loss = 0.291 (2.704 sec/step)
step 31770 	 loss = 0.274 (2.746 sec/step)
step 31780 	 loss = 0.279 (2.828 sec/step)
step 31790 	 loss = 0.265 (2.851 sec/step)
step 31800 	 loss = 0.288 (2.689 sec/step)
step 31810 	 loss = 0.286 (2.875 sec/step)
step 31820 	 loss = 0.262 (2.944 sec/step)
step 31830 	 loss = 0.299 (2.910 sec/step)
step 31840 	 loss = 0.278 (3.134 sec/step)
step 31850 	 loss = 0.280 (3.015 sec/step)
step 31860 	 loss = 0.257 (2.458 sec/step)
step 31870 	 loss = 0.276 (3.233 sec/step)
step 31880 	 loss = 0.266 (3.053 sec/step)
step 31890 	 loss = 0.278 (2.903 sec/step)
step 31900 	 loss = 0.278 (3.019 sec/step)
step 31910 	 loss = 0.296 (2.822 sec/step)
step 31920 	 loss = 0.269 (2.759 sec/step)
step 31930 	 loss = 0.282 (2.963 sec/step)
step 31940 	 loss = 0.276 (2.957 sec/step)
step 31950 	 loss = 0.270 (2.943 sec/step)
step 31960 	 loss = 0.288 (2.765 sec/step)
step 31970 	 loss = 0.272 (3.138 sec/step)
step 31980 	 loss = 0.266 (2.979 sec/step)
step 31990 	 loss = 0.287 (3.054 sec/step)
step 32000 	 loss = 0.304 (2.958 sec/step)
step 32010 	 loss = 0.277 (3.037 sec/step)
step 32020 	 loss = 0.287 (2.918 sec/step)
step 32030 	 loss = 0.283 (2.753 sec/step)
step 32040 	 loss = 0.269 (2.719 sec/step)
step 32050 	 loss = 0.289 (2.787 sec/step)
step 32060 	 loss = 0.281 (2.902 sec/step)
step 32070 	 loss = 0.277 (2.651 sec/step)
step 32080 	 loss = 0.259 (2.831 sec/step)
step 32090 	 loss = 0.280 (2.510 sec/step)
step 32100 	 loss = 0.289 (3.147 sec/step)
step 32110 	 loss = 0.273 (2.938 sec/step)
step 32120 	 loss = 0.273 (2.715 sec/step)
step 32130 	 loss = 0.284 (2.932 sec/step)
step 32140 	 loss = 0.265 (2.939 sec/step)
step 32150 	 loss = 0.279 (2.998 sec/step)
step 32160 	 loss = 0.269 (2.595 sec/step)
step 32170 	 loss = 0.277 (2.836 sec/step)
step 32180 	 loss = 0.284 (2.824 sec/step)
step 32190 	 loss = 0.271 (2.763 sec/step)
step 32200 	 loss = 0.291 (2.815 sec/step)
step 32210 	 loss = 0.275 (2.916 sec/step)
step 32220 	 loss = 0.278 (2.645 sec/step)
step 32230 	 loss = 0.282 (2.820 sec/step)
step 32240 	 loss = 0.306 (2.950 sec/step)
step 32250 	 loss = 0.298 (3.005 sec/step)
step 32260 	 loss = 0.308 (2.657 sec/step)
step 32270 	 loss = 0.278 (2.647 sec/step)
step 32280 	 loss = 0.293 (2.945 sec/step)
step 32290 	 loss = 0.284 (2.700 sec/step)
step 32300 	 loss = 0.267 (2.867 sec/step)
step 32310 	 loss = 0.302 (2.841 sec/step)
step 32320 	 loss = 0.282 (2.743 sec/step)
step 32330 	 loss = 0.265 (2.689 sec/step)
step 32340 	 loss = 0.274 (2.945 sec/step)
step 32350 	 loss = 0.295 (2.794 sec/step)
step 32360 	 loss = 0.283 (3.153 sec/step)
step 32370 	 loss = 0.258 (3.300 sec/step)
step 32380 	 loss = 0.282 (2.440 sec/step)
step 32390 	 loss = 0.280 (2.932 sec/step)
step 32400 	 loss = 0.281 (2.669 sec/step)
step 32410 	 loss = 0.264 (2.953 sec/step)
step 32420 	 loss = 0.295 (2.862 sec/step)
step 32430 	 loss = 0.290 (2.774 sec/step)
step 32440 	 loss = 0.308 (2.798 sec/step)
step 32450 	 loss = 0.309 (3.240 sec/step)
step 32460 	 loss = 0.267 (2.712 sec/step)
step 32470 	 loss = 0.306 (2.775 sec/step)
step 32480 	 loss = 0.277 (2.647 sec/step)
step 32490 	 loss = 0.272 (2.647 sec/step)
step 32500 	 loss = 0.263 (2.694 sec/step)
step 32510 	 loss = 0.296 (2.824 sec/step)
step 32520 	 loss = 0.285 (2.781 sec/step)
step 32530 	 loss = 0.290 (2.827 sec/step)
step 32540 	 loss = 0.274 (2.944 sec/step)
step 32550 	 loss = 0.277 (2.866 sec/step)
step 32560 	 loss = 0.282 (2.972 sec/step)
step 32570 	 loss = 0.269 (2.797 sec/step)
step 32580 	 loss = 0.288 (2.613 sec/step)
step 32590 	 loss = 0.290 (2.665 sec/step)
step 32600 	 loss = 0.286 (2.908 sec/step)
step 32610 	 loss = 0.280 (2.871 sec/step)
step 32620 	 loss = 0.300 (2.920 sec/step)
step 32630 	 loss = 0.286 (2.910 sec/step)
step 32640 	 loss = 0.269 (2.771 sec/step)
step 32650 	 loss = 0.269 (2.632 sec/step)
step 32660 	 loss = 0.292 (3.230 sec/step)
step 32670 	 loss = 0.283 (2.752 sec/step)
step 32680 	 loss = 0.298 (3.141 sec/step)
step 32690 	 loss = 0.280 (2.412 sec/step)
step 32700 	 loss = 0.279 (2.452 sec/step)
step 32710 	 loss = 0.286 (2.374 sec/step)
step 32720 	 loss = 0.287 (2.271 sec/step)
step 32730 	 loss = 0.298 (2.430 sec/step)
step 32740 	 loss = 0.297 (2.209 sec/step)
step 32750 	 loss = 0.271 (2.383 sec/step)
step 32760 	 loss = 0.286 (2.335 sec/step)
step 32770 	 loss = 0.286 (2.495 sec/step)
step 32780 	 loss = 0.294 (2.452 sec/step)
step 32790 	 loss = 0.278 (2.359 sec/step)
step 32800 	 loss = 0.293 (2.361 sec/step)
step 32810 	 loss = 0.294 (2.216 sec/step)
step 32820 	 loss = 0.269 (2.242 sec/step)
step 32830 	 loss = 0.286 (3.902 sec/step)
step 32840 	 loss = 0.293 (2.621 sec/step)
step 32850 	 loss = 0.281 (3.027 sec/step)
step 32860 	 loss = 0.285 (2.720 sec/step)
step 32870 	 loss = 0.300 (2.471 sec/step)
step 32880 	 loss = 0.288 (2.810 sec/step)
step 32890 	 loss = 0.283 (3.106 sec/step)
step 32900 	 loss = 0.266 (3.007 sec/step)
step 32910 	 loss = 0.281 (2.941 sec/step)
step 32920 	 loss = 0.284 (2.868 sec/step)
step 32930 	 loss = 0.278 (2.690 sec/step)
step 32940 	 loss = 0.290 (2.934 sec/step)
step 32950 	 loss = 0.293 (2.796 sec/step)
step 32960 	 loss = 0.281 (2.719 sec/step)
step 32970 	 loss = 0.271 (2.744 sec/step)
step 32980 	 loss = 0.281 (2.755 sec/step)
step 32990 	 loss = 0.285 (2.679 sec/step)
step 33000 	 loss = 0.290 (2.767 sec/step)
step 33010 	 loss = 0.270 (2.998 sec/step)
step 33020 	 loss = 0.278 (2.485 sec/step)
step 33030 	 loss = 0.282 (2.705 sec/step)
step 33040 	 loss = 0.284 (2.979 sec/step)
step 33050 	 loss = 0.275 (2.554 sec/step)
step 33060 	 loss = 0.291 (2.565 sec/step)
step 33070 	 loss = 0.286 (2.922 sec/step)
step 33080 	 loss = 0.291 (2.559 sec/step)
step 33090 	 loss = 0.296 (2.793 sec/step)
step 33100 	 loss = 0.289 (2.817 sec/step)
step 33110 	 loss = 0.284 (3.033 sec/step)
step 33120 	 loss = 0.278 (2.632 sec/step)
step 33130 	 loss = 0.284 (2.603 sec/step)
step 33140 	 loss = 0.263 (3.135 sec/step)
step 33150 	 loss = 0.278 (2.374 sec/step)
step 33160 	 loss = 0.287 (2.929 sec/step)
step 33170 	 loss = 0.292 (2.919 sec/step)
step 33180 	 loss = 0.276 (2.603 sec/step)
step 33190 	 loss = 0.289 (2.801 sec/step)
step 33200 	 loss = 0.289 (2.462 sec/step)
step 33210 	 loss = 0.296 (2.806 sec/step)
step 33220 	 loss = 0.272 (2.943 sec/step)
step 33230 	 loss = 0.280 (2.708 sec/step)
step 33240 	 loss = 0.276 (2.550 sec/step)
step 33250 	 loss = 0.282 (2.653 sec/step)
step 33260 	 loss = 0.294 (2.932 sec/step)
step 33270 	 loss = 0.294 (2.439 sec/step)
step 33280 	 loss = 0.280 (2.679 sec/step)
step 33290 	 loss = 0.280 (2.899 sec/step)
step 33300 	 loss = 0.288 (2.457 sec/step)
step 33310 	 loss = 0.270 (2.894 sec/step)
step 33320 	 loss = 0.273 (3.074 sec/step)
step 33330 	 loss = 0.278 (2.848 sec/step)
step 33340 	 loss = 0.274 (3.290 sec/step)
step 33350 	 loss = 0.263 (3.167 sec/step)
step 33360 	 loss = 0.275 (2.795 sec/step)
step 33370 	 loss = 0.260 (2.592 sec/step)
Training Epoch 9/100
step 33380 	 loss = 0.263 (2.826 sec/step)
step 33390 	 loss = 0.257 (2.810 sec/step)
step 33400 	 loss = 0.244 (3.029 sec/step)
step 33410 	 loss = 0.250 (2.782 sec/step)
step 33420 	 loss = 0.265 (2.632 sec/step)
step 33430 	 loss = 0.256 (3.057 sec/step)
step 33440 	 loss = 0.260 (2.837 sec/step)
step 33450 	 loss = 0.278 (2.908 sec/step)
step 33460 	 loss = 0.271 (2.632 sec/step)
step 33470 	 loss = 0.273 (2.970 sec/step)
step 33480 	 loss = 0.265 (2.488 sec/step)
step 33490 	 loss = 0.254 (3.129 sec/step)
step 33500 	 loss = 0.275 (2.639 sec/step)
step 33510 	 loss = 0.262 (2.960 sec/step)
step 33520 	 loss = 0.255 (2.874 sec/step)
step 33530 	 loss = 0.282 (2.844 sec/step)
step 33540 	 loss = 0.284 (2.865 sec/step)
step 33550 	 loss = 0.250 (2.621 sec/step)
step 33560 	 loss = 0.268 (2.997 sec/step)
step 33570 	 loss = 0.265 (2.815 sec/step)
step 33580 	 loss = 0.259 (3.187 sec/step)
step 33590 	 loss = 0.273 (2.763 sec/step)
step 33600 	 loss = 0.261 (2.891 sec/step)
step 33610 	 loss = 0.250 (2.720 sec/step)
step 33620 	 loss = 0.276 (2.674 sec/step)
step 33630 	 loss = 0.269 (2.754 sec/step)
step 33640 	 loss = 0.270 (2.926 sec/step)
step 33650 	 loss = 0.252 (2.949 sec/step)
step 33660 	 loss = 0.256 (2.798 sec/step)
step 33670 	 loss = 0.283 (2.840 sec/step)
step 33680 	 loss = 0.267 (2.647 sec/step)
step 33690 	 loss = 0.268 (3.143 sec/step)
step 33700 	 loss = 0.267 (2.903 sec/step)
step 33710 	 loss = 0.273 (2.968 sec/step)
step 33720 	 loss = 0.254 (2.833 sec/step)
step 33730 	 loss = 0.250 (3.008 sec/step)
step 33740 	 loss = 0.263 (2.502 sec/step)
step 33750 	 loss = 0.278 (2.858 sec/step)
step 33760 	 loss = 0.274 (2.749 sec/step)
step 33770 	 loss = 0.252 (3.096 sec/step)
step 33780 	 loss = 0.254 (2.572 sec/step)
step 33790 	 loss = 0.256 (2.909 sec/step)
step 33800 	 loss = 0.260 (3.224 sec/step)
step 33810 	 loss = 0.264 (2.909 sec/step)
step 33820 	 loss = 0.266 (2.806 sec/step)
step 33830 	 loss = 0.277 (2.394 sec/step)
step 33840 	 loss = 0.254 (2.630 sec/step)
step 33850 	 loss = 0.262 (3.104 sec/step)
step 33860 	 loss = 0.260 (2.913 sec/step)
step 33870 	 loss = 0.257 (2.989 sec/step)
step 33880 	 loss = 0.269 (2.550 sec/step)
step 33890 	 loss = 0.240 (2.418 sec/step)
step 33900 	 loss = 0.266 (2.557 sec/step)
step 33910 	 loss = 0.279 (2.666 sec/step)
step 33920 	 loss = 0.283 (2.750 sec/step)
step 33930 	 loss = 0.265 (3.066 sec/step)
step 33940 	 loss = 0.253 (2.993 sec/step)
step 33950 	 loss = 0.255 (2.796 sec/step)
step 33960 	 loss = 0.271 (2.670 sec/step)
step 33970 	 loss = 0.273 (2.744 sec/step)
step 33980 	 loss = 0.271 (2.655 sec/step)
step 33990 	 loss = 0.257 (2.746 sec/step)
step 34000 	 loss = 0.273 (2.955 sec/step)
step 34010 	 loss = 0.268 (2.935 sec/step)
step 34020 	 loss = 0.265 (2.653 sec/step)
step 34030 	 loss = 0.281 (3.180 sec/step)
step 34040 	 loss = 0.275 (2.935 sec/step)
step 34050 	 loss = 0.266 (3.087 sec/step)
step 34060 	 loss = 0.269 (2.860 sec/step)
step 34070 	 loss = 0.270 (2.926 sec/step)
step 34080 	 loss = 0.243 (2.574 sec/step)
step 34090 	 loss = 0.254 (2.477 sec/step)
step 34100 	 loss = 0.274 (2.704 sec/step)
step 34110 	 loss = 0.273 (3.023 sec/step)
step 34120 	 loss = 0.286 (2.998 sec/step)
step 34130 	 loss = 0.281 (2.940 sec/step)
step 34140 	 loss = 0.269 (2.849 sec/step)
step 34150 	 loss = 0.261 (2.925 sec/step)
step 34160 	 loss = 0.271 (2.841 sec/step)
step 34170 	 loss = 0.253 (2.882 sec/step)
step 34180 	 loss = 0.278 (2.643 sec/step)
step 34190 	 loss = 0.262 (2.739 sec/step)
step 34200 	 loss = 0.259 (2.457 sec/step)
step 34210 	 loss = 0.266 (2.533 sec/step)
step 34220 	 loss = 0.271 (2.845 sec/step)
step 34230 	 loss = 0.270 (2.686 sec/step)
step 34240 	 loss = 0.289 (2.995 sec/step)
step 34250 	 loss = 0.267 (2.705 sec/step)
step 34260 	 loss = 0.259 (2.665 sec/step)
step 34270 	 loss = 0.249 (2.743 sec/step)
step 34280 	 loss = 0.279 (2.608 sec/step)
step 34290 	 loss = 0.277 (2.772 sec/step)
step 34300 	 loss = 0.259 (2.642 sec/step)
step 34310 	 loss = 0.283 (3.048 sec/step)
step 34320 	 loss = 0.256 (2.848 sec/step)
step 34330 	 loss = 0.272 (2.632 sec/step)
step 34340 	 loss = 0.256 (3.021 sec/step)
step 34350 	 loss = 0.270 (2.819 sec/step)
step 34360 	 loss = 0.266 (2.944 sec/step)
step 34370 	 loss = 0.273 (2.858 sec/step)
step 34380 	 loss = 0.258 (2.687 sec/step)
step 34390 	 loss = 0.272 (3.101 sec/step)
step 34400 	 loss = 0.269 (2.935 sec/step)
step 34410 	 loss = 0.263 (2.620 sec/step)
step 34420 	 loss = 0.255 (2.960 sec/step)
step 34430 	 loss = 0.278 (2.866 sec/step)
step 34440 	 loss = 0.264 (2.501 sec/step)
step 34450 	 loss = 0.276 (2.806 sec/step)
step 34460 	 loss = 0.259 (2.589 sec/step)
step 34470 	 loss = 0.265 (3.005 sec/step)
step 34480 	 loss = 0.269 (3.176 sec/step)
step 34490 	 loss = 0.266 (2.892 sec/step)
step 34500 	 loss = 0.273 (3.105 sec/step)
step 34510 	 loss = 0.253 (2.904 sec/step)
step 34520 	 loss = 0.272 (2.960 sec/step)
step 34530 	 loss = 0.255 (2.924 sec/step)
step 34540 	 loss = 0.258 (2.800 sec/step)
step 34550 	 loss = 0.242 (2.815 sec/step)
step 34560 	 loss = 0.268 (2.694 sec/step)
step 34570 	 loss = 0.257 (3.075 sec/step)
step 34580 	 loss = 0.269 (3.090 sec/step)
step 34590 	 loss = 0.270 (2.979 sec/step)
step 34600 	 loss = 0.286 (2.695 sec/step)
step 34610 	 loss = 0.251 (2.858 sec/step)
step 34620 	 loss = 0.268 (2.626 sec/step)
step 34630 	 loss = 0.272 (2.590 sec/step)
step 34640 	 loss = 0.271 (2.734 sec/step)
step 34650 	 loss = 0.257 (2.717 sec/step)
step 34660 	 loss = 0.253 (2.611 sec/step)
step 34670 	 loss = 0.257 (2.993 sec/step)
step 34680 	 loss = 0.273 (2.816 sec/step)
step 34690 	 loss = 0.270 (2.961 sec/step)
step 34700 	 loss = 0.269 (2.842 sec/step)
step 34710 	 loss = 0.275 (2.799 sec/step)
step 34720 	 loss = 0.262 (2.616 sec/step)
step 34730 	 loss = 0.271 (2.841 sec/step)
step 34740 	 loss = 0.296 (2.758 sec/step)
step 34750 	 loss = 0.274 (2.839 sec/step)
step 34760 	 loss = 0.279 (2.951 sec/step)
step 34770 	 loss = 0.291 (2.483 sec/step)
step 34780 	 loss = 0.283 (2.656 sec/step)
step 34790 	 loss = 0.244 (3.131 sec/step)
step 34800 	 loss = 0.249 (2.525 sec/step)
step 34810 	 loss = 0.256 (3.038 sec/step)
step 34820 	 loss = 0.263 (2.737 sec/step)
step 34830 	 loss = 0.262 (2.952 sec/step)
step 34840 	 loss = 0.275 (3.001 sec/step)
step 34850 	 loss = 0.263 (2.815 sec/step)
step 34860 	 loss = 0.254 (2.992 sec/step)
step 34870 	 loss = 0.267 (2.859 sec/step)
step 34880 	 loss = 0.262 (2.836 sec/step)
step 34890 	 loss = 0.269 (2.990 sec/step)
step 34900 	 loss = 0.281 (2.612 sec/step)
step 34910 	 loss = 0.255 (2.662 sec/step)
step 34920 	 loss = 0.266 (3.270 sec/step)
step 34930 	 loss = 0.268 (2.776 sec/step)
step 34940 	 loss = 0.274 (2.676 sec/step)
step 34950 	 loss = 0.268 (3.138 sec/step)
step 34960 	 loss = 0.277 (3.034 sec/step)
step 34970 	 loss = 0.288 (2.919 sec/step)
step 34980 	 loss = 0.271 (2.723 sec/step)
step 34990 	 loss = 0.267 (2.781 sec/step)
step 35000 	 loss = 0.267 (2.925 sec/step)
step 35010 	 loss = 0.268 (2.599 sec/step)
step 35020 	 loss = 0.265 (2.852 sec/step)
step 35030 	 loss = 0.254 (3.014 sec/step)
step 35040 	 loss = 0.263 (3.125 sec/step)
step 35050 	 loss = 0.277 (2.950 sec/step)
step 35060 	 loss = 0.279 (2.483 sec/step)
step 35070 	 loss = 0.257 (2.682 sec/step)
step 35080 	 loss = 0.265 (2.744 sec/step)
step 35090 	 loss = 0.280 (3.165 sec/step)
step 35100 	 loss = 0.279 (2.727 sec/step)
step 35110 	 loss = 0.266 (2.634 sec/step)
step 35120 	 loss = 0.266 (2.669 sec/step)
step 35130 	 loss = 0.275 (2.631 sec/step)
step 35140 	 loss = 0.281 (3.116 sec/step)
step 35150 	 loss = 0.284 (2.636 sec/step)
step 35160 	 loss = 0.254 (2.825 sec/step)
step 35170 	 loss = 0.262 (2.884 sec/step)
step 35180 	 loss = 0.255 (2.518 sec/step)
step 35190 	 loss = 0.262 (2.658 sec/step)
step 35200 	 loss = 0.280 (2.967 sec/step)
step 35210 	 loss = 0.271 (2.634 sec/step)
step 35220 	 loss = 0.253 (2.962 sec/step)
step 35230 	 loss = 0.266 (3.074 sec/step)
step 35240 	 loss = 0.265 (2.635 sec/step)
step 35250 	 loss = 0.276 (2.771 sec/step)
step 35260 	 loss = 0.274 (2.763 sec/step)
step 35270 	 loss = 0.275 (2.865 sec/step)
step 35280 	 loss = 0.287 (2.769 sec/step)
step 35290 	 loss = 0.285 (3.080 sec/step)
step 35300 	 loss = 0.254 (2.941 sec/step)
step 35310 	 loss = 0.255 (2.359 sec/step)
step 35320 	 loss = 0.275 (2.846 sec/step)
step 35330 	 loss = 0.257 (2.926 sec/step)
step 35340 	 loss = 0.270 (2.588 sec/step)
step 35350 	 loss = 0.270 (2.950 sec/step)
step 35360 	 loss = 0.260 (2.672 sec/step)
step 35370 	 loss = 0.250 (2.727 sec/step)
step 35380 	 loss = 0.273 (2.845 sec/step)
step 35390 	 loss = 0.273 (2.559 sec/step)
step 35400 	 loss = 0.273 (2.611 sec/step)
step 35410 	 loss = 0.268 (2.785 sec/step)
step 35420 	 loss = 0.257 (3.048 sec/step)
step 35430 	 loss = 0.271 (2.810 sec/step)
step 35440 	 loss = 0.270 (2.461 sec/step)
step 35450 	 loss = 0.266 (2.458 sec/step)
step 35460 	 loss = 0.262 (3.135 sec/step)
step 35470 	 loss = 0.275 (2.814 sec/step)
step 35480 	 loss = 0.274 (2.804 sec/step)
step 35490 	 loss = 0.258 (2.621 sec/step)
step 35500 	 loss = 0.256 (2.896 sec/step)
step 35510 	 loss = 0.265 (2.430 sec/step)
step 35520 	 loss = 0.257 (2.748 sec/step)
step 35530 	 loss = 0.266 (2.502 sec/step)
step 35540 	 loss = 0.275 (2.729 sec/step)
step 35550 	 loss = 0.271 (2.855 sec/step)
step 35560 	 loss = 0.290 (3.146 sec/step)
step 35570 	 loss = 0.266 (2.812 sec/step)
step 35580 	 loss = 0.263 (2.769 sec/step)
step 35590 	 loss = 0.262 (2.813 sec/step)
step 35600 	 loss = 0.263 (2.699 sec/step)
step 35610 	 loss = 0.264 (2.647 sec/step)
step 35620 	 loss = 0.262 (2.809 sec/step)
step 35630 	 loss = 0.264 (2.871 sec/step)
step 35640 	 loss = 0.284 (3.260 sec/step)
step 35650 	 loss = 0.281 (2.651 sec/step)
step 35660 	 loss = 0.282 (2.768 sec/step)
step 35670 	 loss = 0.259 (3.048 sec/step)
step 35680 	 loss = 0.277 (2.889 sec/step)
step 35690 	 loss = 0.265 (2.758 sec/step)
step 35700 	 loss = 0.268 (2.654 sec/step)
step 35710 	 loss = 0.287 (3.071 sec/step)
step 35720 	 loss = 0.268 (2.679 sec/step)
step 35730 	 loss = 0.276 (2.730 sec/step)
step 35740 	 loss = 0.282 (3.018 sec/step)
step 35750 	 loss = 0.272 (2.886 sec/step)
step 35760 	 loss = 0.269 (2.462 sec/step)
step 35770 	 loss = 0.279 (3.195 sec/step)
step 35780 	 loss = 0.273 (2.699 sec/step)
step 35790 	 loss = 0.262 (2.766 sec/step)
step 35800 	 loss = 0.269 (2.604 sec/step)
step 35810 	 loss = 0.287 (2.720 sec/step)
step 35820 	 loss = 0.277 (2.593 sec/step)
step 35830 	 loss = 0.275 (2.850 sec/step)
step 35840 	 loss = 0.275 (2.913 sec/step)
step 35850 	 loss = 0.265 (3.139 sec/step)
step 35860 	 loss = 0.282 (3.250 sec/step)
step 35870 	 loss = 0.274 (3.054 sec/step)
step 35880 	 loss = 0.267 (2.964 sec/step)
step 35890 	 loss = 0.285 (2.746 sec/step)
step 35900 	 loss = 0.282 (3.033 sec/step)
step 35910 	 loss = 0.274 (2.603 sec/step)
step 35920 	 loss = 0.282 (2.815 sec/step)
step 35930 	 loss = 0.263 (2.639 sec/step)
step 35940 	 loss = 0.261 (3.014 sec/step)
step 35950 	 loss = 0.274 (2.928 sec/step)
step 35960 	 loss = 0.277 (2.839 sec/step)
step 35970 	 loss = 0.269 (2.828 sec/step)
step 35980 	 loss = 0.277 (2.693 sec/step)
step 35990 	 loss = 0.271 (2.647 sec/step)
step 36000 	 loss = 0.273 (2.848 sec/step)
step 36010 	 loss = 0.283 (3.012 sec/step)
step 36020 	 loss = 0.275 (2.839 sec/step)
step 36030 	 loss = 0.262 (3.008 sec/step)
step 36040 	 loss = 0.264 (2.585 sec/step)
step 36050 	 loss = 0.255 (2.732 sec/step)
step 36060 	 loss = 0.264 (2.982 sec/step)
step 36070 	 loss = 0.277 (2.633 sec/step)
step 36080 	 loss = 0.257 (2.774 sec/step)
step 36090 	 loss = 0.283 (2.649 sec/step)
step 36100 	 loss = 0.294 (2.979 sec/step)
step 36110 	 loss = 0.267 (3.026 sec/step)
step 36120 	 loss = 0.272 (3.021 sec/step)
step 36130 	 loss = 0.262 (3.223 sec/step)
step 36140 	 loss = 0.266 (2.866 sec/step)
step 36150 	 loss = 0.277 (2.838 sec/step)
step 36160 	 loss = 0.281 (3.083 sec/step)
step 36170 	 loss = 0.259 (2.925 sec/step)
step 36180 	 loss = 0.272 (2.591 sec/step)
step 36190 	 loss = 0.277 (3.012 sec/step)
step 36200 	 loss = 0.267 (2.783 sec/step)
step 36210 	 loss = 0.268 (2.844 sec/step)
step 36220 	 loss = 0.269 (2.959 sec/step)
step 36230 	 loss = 0.275 (2.596 sec/step)
step 36240 	 loss = 0.256 (2.703 sec/step)
step 36250 	 loss = 0.264 (2.724 sec/step)
step 36260 	 loss = 0.285 (3.074 sec/step)
step 36270 	 loss = 0.258 (2.685 sec/step)
step 36280 	 loss = 0.275 (2.870 sec/step)
step 36290 	 loss = 0.274 (2.659 sec/step)
step 36300 	 loss = 0.274 (2.923 sec/step)
step 36310 	 loss = 0.284 (2.775 sec/step)
step 36320 	 loss = 0.264 (3.038 sec/step)
step 36330 	 loss = 0.276 (2.688 sec/step)
step 36340 	 loss = 0.279 (2.934 sec/step)
step 36350 	 loss = 0.262 (2.885 sec/step)
step 36360 	 loss = 0.270 (2.872 sec/step)
step 36370 	 loss = 0.276 (2.930 sec/step)
step 36380 	 loss = 0.254 (2.971 sec/step)
step 36390 	 loss = 0.262 (2.721 sec/step)
step 36400 	 loss = 0.300 (2.950 sec/step)
step 36410 	 loss = 0.264 (2.532 sec/step)
step 36420 	 loss = 0.261 (2.607 sec/step)
step 36430 	 loss = 0.261 (2.588 sec/step)
step 36440 	 loss = 0.258 (2.738 sec/step)
step 36450 	 loss = 0.273 (2.700 sec/step)
step 36460 	 loss = 0.288 (2.913 sec/step)
step 36470 	 loss = 0.270 (2.493 sec/step)
step 36480 	 loss = 0.277 (2.401 sec/step)
step 36490 	 loss = 0.260 (2.806 sec/step)
step 36500 	 loss = 0.262 (3.123 sec/step)
step 36510 	 loss = 0.275 (3.100 sec/step)
step 36520 	 loss = 0.258 (2.823 sec/step)
step 36530 	 loss = 0.268 (3.017 sec/step)
step 36540 	 loss = 0.264 (2.971 sec/step)
step 36550 	 loss = 0.267 (2.778 sec/step)
step 36560 	 loss = 0.269 (2.820 sec/step)
step 36570 	 loss = 0.279 (2.773 sec/step)
step 36580 	 loss = 0.267 (2.636 sec/step)
step 36590 	 loss = 0.276 (2.999 sec/step)
step 36600 	 loss = 0.277 (2.813 sec/step)
step 36610 	 loss = 0.291 (3.243 sec/step)
step 36620 	 loss = 0.282 (3.002 sec/step)
step 36630 	 loss = 0.272 (2.747 sec/step)
step 36640 	 loss = 0.267 (2.842 sec/step)
step 36650 	 loss = 0.265 (2.548 sec/step)
step 36660 	 loss = 0.281 (2.846 sec/step)
step 36670 	 loss = 0.263 (2.728 sec/step)
step 36680 	 loss = 0.259 (2.846 sec/step)
step 36690 	 loss = 0.263 (2.796 sec/step)
step 36700 	 loss = 0.265 (2.600 sec/step)
step 36710 	 loss = 0.280 (2.675 sec/step)
step 36720 	 loss = 0.265 (2.647 sec/step)
step 36730 	 loss = 0.274 (2.760 sec/step)
step 36740 	 loss = 0.258 (3.122 sec/step)
step 36750 	 loss = 0.267 (2.957 sec/step)
step 36760 	 loss = 0.262 (2.894 sec/step)
step 36770 	 loss = 0.272 (2.763 sec/step)
step 36780 	 loss = 0.273 (2.830 sec/step)
step 36790 	 loss = 0.260 (2.799 sec/step)
step 36800 	 loss = 0.265 (3.221 sec/step)
step 36810 	 loss = 0.256 (2.664 sec/step)
step 36820 	 loss = 0.267 (2.764 sec/step)
step 36830 	 loss = 0.290 (2.816 sec/step)
step 36840 	 loss = 0.267 (2.919 sec/step)
step 36850 	 loss = 0.271 (2.665 sec/step)
step 36860 	 loss = 0.291 (3.187 sec/step)
step 36870 	 loss = 0.265 (2.786 sec/step)
step 36880 	 loss = 0.262 (2.692 sec/step)
step 36890 	 loss = 0.264 (2.829 sec/step)
step 36900 	 loss = 0.283 (2.832 sec/step)
step 36910 	 loss = 0.257 (2.805 sec/step)
step 36920 	 loss = 0.261 (2.912 sec/step)
step 36930 	 loss = 0.284 (2.768 sec/step)
step 36940 	 loss = 0.264 (2.606 sec/step)
step 36950 	 loss = 0.285 (2.750 sec/step)
step 36960 	 loss = 0.268 (2.823 sec/step)
step 36970 	 loss = 0.276 (3.403 sec/step)
step 36980 	 loss = 0.268 (2.903 sec/step)
step 36990 	 loss = 0.283 (2.675 sec/step)
step 37000 	 loss = 0.268 (2.803 sec/step)
step 37010 	 loss = 0.270 (2.809 sec/step)
step 37020 	 loss = 0.268 (3.080 sec/step)
step 37030 	 loss = 0.278 (2.824 sec/step)
step 37040 	 loss = 0.268 (2.752 sec/step)
step 37050 	 loss = 0.255 (2.481 sec/step)
step 37060 	 loss = 0.277 (2.672 sec/step)
step 37070 	 loss = 0.264 (3.100 sec/step)
Training Epoch 10/100
step 37080 	 loss = 0.247 (2.886 sec/step)
step 37090 	 loss = 0.247 (2.892 sec/step)
step 37100 	 loss = 0.250 (2.872 sec/step)
step 37110 	 loss = 0.255 (2.866 sec/step)
step 37120 	 loss = 0.245 (2.682 sec/step)
step 37130 	 loss = 0.249 (3.000 sec/step)
step 37140 	 loss = 0.259 (2.790 sec/step)
step 37150 	 loss = 0.241 (2.787 sec/step)
step 37160 	 loss = 0.245 (2.955 sec/step)
step 37170 	 loss = 0.258 (2.626 sec/step)
step 37180 	 loss = 0.259 (2.983 sec/step)
step 37190 	 loss = 0.244 (2.633 sec/step)
step 37200 	 loss = 0.259 (2.681 sec/step)
step 37210 	 loss = 0.257 (3.103 sec/step)
step 37220 	 loss = 0.244 (2.880 sec/step)
step 37230 	 loss = 0.253 (2.839 sec/step)
step 37240 	 loss = 0.262 (3.088 sec/step)
step 37250 	 loss = 0.240 (2.974 sec/step)
step 37260 	 loss = 0.247 (2.887 sec/step)
step 37270 	 loss = 0.250 (2.756 sec/step)
step 37280 	 loss = 0.259 (2.766 sec/step)
step 37290 	 loss = 0.256 (3.065 sec/step)
step 37300 	 loss = 0.262 (3.192 sec/step)
step 37310 	 loss = 0.240 (3.000 sec/step)
step 37320 	 loss = 0.265 (2.791 sec/step)
step 37330 	 loss = 0.251 (3.068 sec/step)
step 37340 	 loss = 0.258 (3.094 sec/step)
step 37350 	 loss = 0.245 (2.708 sec/step)
step 37360 	 loss = 0.244 (2.810 sec/step)
step 37370 	 loss = 0.243 (2.632 sec/step)
step 37380 	 loss = 0.234 (2.697 sec/step)
step 37390 	 loss = 0.238 (2.733 sec/step)
step 37400 	 loss = 0.248 (2.797 sec/step)
step 37410 	 loss = 0.237 (2.722 sec/step)
step 37420 	 loss = 0.239 (2.761 sec/step)
step 37430 	 loss = 0.264 (2.811 sec/step)
step 37440 	 loss = 0.259 (2.716 sec/step)
step 37450 	 loss = 0.252 (2.977 sec/step)
step 37460 	 loss = 0.258 (2.794 sec/step)

In [ ]: