In [1]:
import tensorflow as tf
import time

gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1)

In [2]:
import tarfile
import re
import urllib.request
import os
import random

class ImdbMovieReviews:
    DEFAULT_URL = \
        'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
    TOKEN_REGEX = re.compile(r'[A-Za-z]+|[!?.:,()]')
    
    def __init__(self):
        self._cache_dir = './imdb'
        self._url = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
        
        if not os.path.isfile(self._cache_dir):
            urllib.request.urlretrieve(self._url, self._cache_dir)
        self.filepath = self._cache_dir

    def __iter__(self):
        with tarfile.open(self.filepath) as archive:
            items = archive.getnames()
            for filename in archive.getnames():
                if filename.startswith('aclImdb/train/pos/'):
                    yield self._read(archive, filename), True
                elif filename.startswith('aclImdb/train/neg/'):
                    yield self._read(archive, filename), False
                    
    def _read(self, archive, filename):
        with archive.extractfile(filename) as file_:
            data = file_.read().decode('utf-8')
            data = type(self).TOKEN_REGEX.findall(data)
            data = [x.lower() for x in data]
            return data

In [3]:
import numpy as np
# Spacy is my favourite nlp framework, which havu builtin word embeddings trains on wikipesia
from spacy.en import English

class Embedding:
    
    def __init__(self):
#          spaCy makes using word vectors very easy. 
#             The Lexeme , Token , Span  and Doc  classes all have a .vector property,
#             which is a 1-dimensional numpy array of 32-bit floats:
        self.parser = English()
#         self._length = length
        self.dimensions = 300
        
    def __call__(self, sequence, length):
        # DO I really need them to be equal length?
        # Let's assume I'm not
        data = np.zeros((length, self.dimensions))
        # you can access known words from the parser's vocabulary
        embedded = [self.parser.vocab[w].vector for w in sequence]
        data[:len(sequence)] = embedded
        return data

In [4]:
import itertools

def preprocess_batched_split(iterator, embedding, batch_size):
    iterator = iter(iterator)
    while True:
        batch = []
        labelss = []
        sentence_sizes_batch = []
        for index in range(batch_size):
            text, label = next(iterator)
            sents = [list(y) for x, y in itertools.groupby(text, lambda z: z == '.') if not x]
            sentence_sizes = [len(s) for s in sents]
            text_embed = [embedding(sent) for sent in sents]
            
            batch.append(text_embed)
            labelss.append(label)
            sentence_sizes_batch.append(sentence_sizes)
            
        labels_batch = np.array(labelss, dtype=np.int32)
        sent_per_doc = np.array([len(x) for x in sentence_sizes_batch])
        words_per_sent_per_doc = np.array(sentence_sizes_batch)
        yield np.array(batch), labels_batch, words_per_sent_per_doc, sent_per_doc

In [5]:
import itertools

def preprocess_batched_split2(iterator, embedding, batch_size):
    iterator = iter(iterator)
    while True:
        batch, labels_b = zip(*itertools.islice(iterator, batch_size))
        
        sents_b = [[list(y) for x, y in itertools.groupby(doc, lambda z: z == '.') if not x] for doc in batch]

        sentence_sizes_b = [[len(sent) for sent in doc] for doc in sents_b]
        sentence_size = max(map(max, sentence_sizes_b))
        
        document_sizes = np.array([len(doc) for doc in sentence_sizes_b], dtype=np.int32)
        document_size = document_sizes.max()

        sentence_sizes_np = np.zeros(shape=[batch_size, document_size], dtype=np.int32)
        for bi, ds, ss in zip(range(sentence_sizes_np.shape[0]), document_sizes, sentence_sizes_b):
            sentence_sizes_np[bi][:ds] = ss
        
        text_embed_b = np.zeros((batch_size, document_size, sentence_size, 300))
        for i, ds, doc_sents in zip(range(text_embed_b.shape[0]), document_sizes, sents_b):
            doc_sents_embed = np.array([embedding(sent, sentence_size) for sent in doc_sents])
            text_embed_b[i][:ds] = doc_sents_embed
        
        yield text_embed_b, np.array(labels_b, dtype=np.int32), np.array(document_sizes), sentence_sizes_np

In [6]:
reviews = list(ImdbMovieReviews())

In [7]:
random.shuffle(reviews)

In [8]:
#################################################

In [9]:
%load_ext autoreload
%autoreload 1
%aimport HanSequenceLabellingModel, model_components
%aimport


Modules to reload:
HanSequenceLabellingModel model_components

Modules to skip:


In [10]:
batches_split = preprocess_batched_split2(reviews, Embedding(), batch_size=10)

In [11]:
from HanSequenceLabellingModel import HanSequenceLabellingModel

In [14]:
def HAN_model_1(session, restore_only=False):
    """Hierarhical Attention Network"""
    import tensorflow as tf
    try:
        from tensorflow.contrib.rnn import GRUCell, MultiRNNCell, DropoutWrapper
    except ImportError:
        MultiRNNCell = tf.nn.rnn_cell.MultiRNNCell
        GRUCell = tf.nn.rnn_cell.GRUCell
    from bn_lstm import BNLSTMCell
    from HanSequenceLabellingModel import HanSequenceLabellingModel

    is_training = tf.placeholder(dtype=tf.bool, name='is_training')

    cell = BNLSTMCell(80, is_training) # h-h batchnorm LSTMCell
    cell = MultiRNNCell([cell]*5)

    model = HanSequenceLabellingModel(
            embedding_size=300,
            classes=2,
            word_cell=cell,
            sentence_cell=cell,
            word_output_size=300,
            sentence_output_size=300,
            learning_rate=0.001,
            max_grad_norm=5.0,
            dropout_keep_proba=0.5,
            is_training=is_training,
    )

    saver = tf.train.Saver(tf.global_variables())
    checkpoint_dir = 'checkpoints'
    checkpoint = tf.train.get_checkpoint_state(checkpoint_dir)
    if checkpoint:
        print("Reading model parameters from %s" % checkpoint.model_checkpoint_path)
        saver.restore(session, checkpoint.model_checkpoint_path)
    elif restore_only:
        raise FileNotFoundError("Cannot restore model")
    else:
        print("Created model with fresh parameters")
        session.run(tf.global_variables_initializer())
        
    return model, saver

In [ ]:
tf.reset_default_graph()

config = tf.ConfigProto(allow_soft_placement=True)

with tf.Session(config=config) as s:
    model, saver = HAN_model_1(s)
    tflog_dir = 'tf_logs'
    summary_writer = tf.summary.FileWriter(tflog_dir, graph=tf.get_default_graph())

    for i, (data, labels_batch, sent_per_doc, words_per_sent_per_doc,) in enumerate(batches_split):

        fd = {
            model.is_training: True,
            model.inputs_embedded: data,
            model.word_lengths: words_per_sent_per_doc,
            model.sentence_lengths: sent_per_doc,
            model.labels: labels_batch,
            model.sample_weights: np.ones(shape=(10))
        }

        t0 = time.clock()
        step, summaries, loss, accuracy, _ = s.run([
                model.global_step,
                model.summary,
                model.loss,
                model.accuracy,
                model.train_op,
        ], feed_dict=fd)
        td = time.clock() - t0

        summary_writer.add_summary(summaries, global_step=step)

        checkpoint_frequency = 100
        eval_frequency = 1
        
        if step % 1 == 0:
            print('step %s, loss=%s, accuracy=%s, t=%s, inputs=%s' % (step, loss, accuracy, round(td, 2), fd[model.inputs_embedded].shape))
        if step != 0 and step % checkpoint_frequency == 0:
            print('checkpoint & graph meta')
            checkpoint_path = 'checkpoints/checkpoint'
            saver.save(s, checkpoint_path, global_step=step)
            print('checkpoint done')


Created model with fresh parameters
step 1, loss=0.738725, accuracy=0.2, t=10.37, inputs=(10, 14, 73, 300)
step 2, loss=0.680109, accuracy=0.8, t=8.25, inputs=(10, 29, 68, 300)
step 3, loss=0.714105, accuracy=0.6, t=10.36, inputs=(10, 23, 99, 300)
step 4, loss=0.698463, accuracy=0.5, t=6.24, inputs=(10, 16, 60, 300)
step 5, loss=0.68224, accuracy=0.6, t=11.46, inputs=(10, 24, 109, 300)
step 6, loss=0.700114, accuracy=0.2, t=9.41, inputs=(10, 35, 71, 300)
step 7, loss=0.674425, accuracy=0.5, t=13.91, inputs=(10, 53, 106, 300)
step 8, loss=0.661985, accuracy=0.8, t=8.66, inputs=(10, 38, 63, 300)
step 9, loss=0.725445, accuracy=0.4, t=7.17, inputs=(10, 38, 45, 300)
step 10, loss=0.70498, accuracy=0.5, t=12.06, inputs=(10, 35, 107, 300)
step 11, loss=0.736788, accuracy=0.1, t=21.29, inputs=(10, 14, 234, 300)
step 12, loss=0.692447, accuracy=0.6, t=13.15, inputs=(10, 32, 124, 300)
step 13, loss=0.718758, accuracy=0.4, t=8.04, inputs=(10, 14, 81, 300)
step 14, loss=0.697673, accuracy=0.5, t=14.39, inputs=(10, 54, 109, 300)
step 15, loss=0.685207, accuracy=0.4, t=6.97, inputs=(10, 26, 58, 300)
step 16, loss=0.681173, accuracy=0.5, t=8.97, inputs=(10, 20, 85, 300)
step 17, loss=0.664052, accuracy=0.5, t=19.29, inputs=(10, 40, 178, 300)
step 18, loss=0.67466, accuracy=0.6, t=7.53, inputs=(10, 17, 74, 300)
step 19, loss=0.649936, accuracy=0.7, t=10.75, inputs=(10, 30, 94, 300)
step 20, loss=0.708297, accuracy=0.3, t=11.39, inputs=(10, 35, 98, 300)
step 21, loss=0.682932, accuracy=0.6, t=12.3, inputs=(10, 46, 95, 300)
step 22, loss=0.676214, accuracy=0.6, t=9.67, inputs=(10, 48, 62, 300)
step 23, loss=0.696116, accuracy=0.4, t=7.44, inputs=(10, 30, 56, 300)
step 24, loss=0.717598, accuracy=0.4, t=15.88, inputs=(10, 22, 163, 300)
step 25, loss=0.711038, accuracy=0.4, t=7.68, inputs=(10, 15, 76, 300)
step 26, loss=0.770762, accuracy=0.4, t=12.69, inputs=(10, 34, 113, 300)
step 27, loss=0.732668, accuracy=0.5, t=12.54, inputs=(10, 35, 110, 300)
step 28, loss=0.74201, accuracy=0.4, t=10.92, inputs=(10, 28, 102, 300)
step 29, loss=0.735633, accuracy=0.3, t=15.72, inputs=(10, 32, 149, 300)
step 30, loss=0.695998, accuracy=0.4, t=11.38, inputs=(10, 35, 96, 300)
step 31, loss=0.673268, accuracy=0.5, t=8.01, inputs=(10, 24, 73, 300)
step 32, loss=0.710594, accuracy=0.4, t=9.48, inputs=(10, 40, 67, 300)
step 33, loss=0.730523, accuracy=0.6, t=6.26, inputs=(10, 24, 51, 300)
step 34, loss=0.68752, accuracy=0.6, t=10.6, inputs=(10, 43, 78, 300)
step 35, loss=0.671403, accuracy=0.9, t=8.12, inputs=(10, 32, 62, 300)
step 36, loss=0.700901, accuracy=0.5, t=20.48, inputs=(10, 59, 166, 300)
step 37, loss=0.702703, accuracy=0.6, t=6.83, inputs=(10, 20, 60, 300)
step 38, loss=0.672189, accuracy=0.7, t=21.35, inputs=(10, 54, 189, 300)
step 39, loss=0.68797, accuracy=0.4, t=9.8, inputs=(10, 29, 87, 300)
step 40, loss=0.669536, accuracy=0.6, t=7.48, inputs=(10, 15, 74, 300)
step 41, loss=0.642831, accuracy=0.8, t=7.48, inputs=(10, 40, 48, 300)
step 42, loss=0.692359, accuracy=0.8, t=16.4, inputs=(10, 45, 145, 300)
step 43, loss=0.639724, accuracy=0.8, t=11.56, inputs=(10, 37, 95, 300)
step 44, loss=0.774762, accuracy=0.4, t=11.33, inputs=(10, 43, 90, 300)
step 45, loss=0.734046, accuracy=0.5, t=10.83, inputs=(10, 35, 91, 300)
step 46, loss=0.683978, accuracy=0.6, t=9.93, inputs=(10, 17, 103, 300)
step 47, loss=0.668237, accuracy=0.6, t=13.44, inputs=(10, 46, 110, 300)
step 48, loss=0.726773, accuracy=0.4, t=10.27, inputs=(10, 19, 103, 300)
step 49, loss=0.636052, accuracy=0.7, t=16.33, inputs=(10, 27, 161, 300)
step 50, loss=0.7441, accuracy=0.3, t=7.62, inputs=(10, 19, 71, 300)
step 51, loss=0.728666, accuracy=0.4, t=14.42, inputs=(10, 24, 143, 300)
step 52, loss=0.581205, accuracy=0.8, t=10.1, inputs=(10, 12, 105, 300)
step 53, loss=0.74154, accuracy=0.3, t=8.77, inputs=(10, 16, 89, 300)
step 54, loss=0.768248, accuracy=0.4, t=8.33, inputs=(10, 17, 81, 300)
step 55, loss=0.728258, accuracy=0.3, t=24.55, inputs=(10, 25, 261, 300)
step 56, loss=0.64362, accuracy=0.7, t=6.46, inputs=(10, 13, 66, 300)
step 57, loss=0.687542, accuracy=0.6, t=14.86, inputs=(10, 30, 144, 300)
step 58, loss=0.715265, accuracy=0.4, t=8.93, inputs=(10, 33, 72, 300)
step 59, loss=0.696482, accuracy=0.6, t=7.73, inputs=(10, 23, 68, 300)
step 60, loss=0.774411, accuracy=0.5, t=12.09, inputs=(10, 38, 106, 300)
step 61, loss=0.734261, accuracy=0.5, t=12.53, inputs=(10, 24, 127, 300)
step 62, loss=0.646685, accuracy=0.9, t=9.39, inputs=(10, 15, 96, 300)
step 63, loss=0.670013, accuracy=0.7, t=12.81, inputs=(10, 38, 108, 300)
step 64, loss=0.642633, accuracy=0.7, t=11.91, inputs=(10, 24, 116, 300)
step 65, loss=0.676454, accuracy=0.4, t=10.84, inputs=(10, 25, 103, 300)
step 66, loss=0.692024, accuracy=0.5, t=7.4, inputs=(10, 18, 65, 300)
step 67, loss=0.673464, accuracy=0.7, t=11.21, inputs=(10, 28, 98, 300)
step 68, loss=0.72135, accuracy=0.5, t=6.8, inputs=(10, 25, 55, 300)
step 69, loss=0.651826, accuracy=0.7, t=12.8, inputs=(10, 26, 125, 300)
step 70, loss=0.615962, accuracy=0.8, t=9.85, inputs=(10, 19, 92, 300)
step 71, loss=0.781494, accuracy=0.4, t=17.18, inputs=(10, 16, 182, 300)
step 72, loss=0.816168, accuracy=0.4, t=8.88, inputs=(10, 16, 89, 300)
step 73, loss=0.579751, accuracy=0.7, t=7.7, inputs=(10, 13, 80, 300)
step 74, loss=0.728947, accuracy=0.5, t=9.46, inputs=(10, 38, 74, 300)
step 75, loss=0.644704, accuracy=0.9, t=8.97, inputs=(10, 36, 70, 300)
step 76, loss=0.688664, accuracy=0.3, t=8.87, inputs=(10, 22, 83, 300)
step 77, loss=0.706557, accuracy=0.4, t=11.11, inputs=(10, 26, 104, 300)
step 78, loss=0.666332, accuracy=0.5, t=12.72, inputs=(10, 36, 114, 300)
step 79, loss=0.69793, accuracy=0.7, t=13.47, inputs=(10, 52, 105, 300)
step 80, loss=0.645942, accuracy=0.6, t=8.26, inputs=(10, 28, 68, 300)
step 81, loss=0.726684, accuracy=0.5, t=8.54, inputs=(10, 30, 70, 300)
step 82, loss=0.654467, accuracy=0.8, t=10.95, inputs=(10, 31, 100, 300)
step 83, loss=0.70281, accuracy=0.7, t=9.95, inputs=(10, 46, 64, 300)
step 84, loss=0.707806, accuracy=0.5, t=9.27, inputs=(10, 19, 84, 300)
step 85, loss=0.658835, accuracy=0.7, t=9.44, inputs=(10, 22, 89, 300)
step 86, loss=0.775999, accuracy=0.4, t=7.88, inputs=(10, 28, 66, 300)
step 87, loss=0.649509, accuracy=0.7, t=18.23, inputs=(10, 26, 188, 300)
step 88, loss=0.671548, accuracy=0.6, t=7.27, inputs=(10, 21, 67, 300)
step 89, loss=0.683613, accuracy=0.5, t=10.23, inputs=(10, 31, 90, 300)
step 90, loss=0.769585, accuracy=0.5, t=8.12, inputs=(10, 16, 80, 300)
step 91, loss=0.691055, accuracy=0.5, t=10.74, inputs=(10, 49, 75, 300)
step 92, loss=0.667763, accuracy=0.5, t=9.77, inputs=(10, 52, 60, 300)
step 93, loss=0.658354, accuracy=0.8, t=10.19, inputs=(10, 31, 89, 300)
step 94, loss=0.682494, accuracy=0.7, t=9.0, inputs=(10, 42, 63, 300)
step 95, loss=0.667649, accuracy=0.5, t=8.3, inputs=(10, 39, 57, 300)
step 96, loss=0.749456, accuracy=0.4, t=11.36, inputs=(10, 22, 111, 300)
step 97, loss=0.671649, accuracy=0.5, t=5.08, inputs=(10, 21, 41, 300)
step 98, loss=0.716974, accuracy=0.5, t=7.7, inputs=(10, 22, 68, 300)
step 99, loss=0.673576, accuracy=0.8, t=19.08, inputs=(10, 18, 204, 300)
step 100, loss=0.563101, accuracy=0.8, t=7.56, inputs=(10, 10, 80, 300)
checkpoint & graph meta
checkpoint done
step 101, loss=0.665385, accuracy=0.2, t=16.0, inputs=(10, 23, 160, 300)
step 102, loss=0.69084, accuracy=0.8, t=8.31, inputs=(10, 17, 79, 300)
step 103, loss=0.613912, accuracy=0.7, t=8.99, inputs=(10, 17, 87, 300)
step 104, loss=0.576232, accuracy=0.9, t=10.29, inputs=(10, 37, 83, 300)
step 105, loss=0.687317, accuracy=0.7, t=18.56, inputs=(10, 11, 202, 300)
step 106, loss=0.64717, accuracy=0.5, t=7.71, inputs=(10, 27, 63, 300)
step 107, loss=0.666744, accuracy=0.6, t=8.91, inputs=(10, 28, 76, 300)
step 108, loss=0.61652, accuracy=0.5, t=6.26, inputs=(10, 18, 55, 300)
step 109, loss=0.6916, accuracy=0.5, t=9.76, inputs=(10, 45, 69, 300)
step 110, loss=0.702644, accuracy=0.6, t=7.73, inputs=(10, 19, 71, 300)
step 111, loss=0.660846, accuracy=0.8, t=12.29, inputs=(10, 39, 102, 300)
step 112, loss=0.673234, accuracy=0.4, t=10.68, inputs=(10, 36, 89, 300)
step 113, loss=0.613395, accuracy=0.9, t=8.65, inputs=(10, 47, 55, 300)
step 114, loss=0.622281, accuracy=0.6, t=9.25, inputs=(10, 37, 69, 300)
step 115, loss=0.585489, accuracy=0.8, t=7.19, inputs=(10, 24, 60, 300)
step 116, loss=0.706951, accuracy=0.5, t=13.94, inputs=(10, 45, 116, 300)
step 117, loss=0.544128, accuracy=0.6, t=18.22, inputs=(10, 18, 200, 300)
step 118, loss=0.578999, accuracy=0.7, t=11.21, inputs=(10, 38, 92, 300)
step 119, loss=0.645401, accuracy=0.6, t=9.79, inputs=(10, 39, 74, 300)
step 120, loss=0.707691, accuracy=0.4, t=7.6, inputs=(10, 31, 60, 300)
step 121, loss=0.736745, accuracy=0.6, t=6.78, inputs=(10, 20, 63, 300)
step 122, loss=0.631888, accuracy=0.7, t=8.76, inputs=(10, 22, 74, 300)
step 123, loss=0.658126, accuracy=0.6, t=9.22, inputs=(10, 20, 85, 300)
step 124, loss=0.64567, accuracy=0.6, t=12.89, inputs=(10, 20, 126, 300)
step 125, loss=0.598127, accuracy=0.8, t=7.64, inputs=(10, 34, 57, 300)
step 126, loss=0.740697, accuracy=0.7, t=6.69, inputs=(10, 17, 63, 300)
step 127, loss=1.05066, accuracy=0.3, t=7.38, inputs=(10, 17, 72, 300)
step 128, loss=0.667948, accuracy=0.7, t=9.8, inputs=(10, 44, 71, 300)
step 129, loss=0.829716, accuracy=0.2, t=19.66, inputs=(10, 26, 204, 300)
step 130, loss=0.581855, accuracy=0.8, t=13.48, inputs=(10, 28, 130, 300)
step 131, loss=0.68112, accuracy=0.6, t=8.51, inputs=(10, 26, 74, 300)
step 132, loss=0.643864, accuracy=0.6, t=8.34, inputs=(10, 17, 81, 300)
step 133, loss=0.734921, accuracy=0.5, t=12.91, inputs=(10, 25, 127, 300)
step 134, loss=0.630809, accuracy=0.5, t=7.22, inputs=(10, 24, 63, 300)
step 135, loss=0.75705, accuracy=0.7, t=10.39, inputs=(10, 32, 95, 300)
step 136, loss=0.612458, accuracy=0.7, t=8.19, inputs=(10, 24, 73, 300)
step 137, loss=0.648815, accuracy=0.7, t=11.33, inputs=(10, 26, 109, 300)
step 138, loss=0.557695, accuracy=0.6, t=6.37, inputs=(10, 19, 57, 300)
step 139, loss=0.654729, accuracy=0.6, t=9.22, inputs=(10, 33, 78, 300)
step 140, loss=0.639275, accuracy=0.6, t=10.26, inputs=(10, 16, 107, 300)
step 141, loss=0.630132, accuracy=0.6, t=6.13, inputs=(10, 15, 57, 300)
step 142, loss=0.637619, accuracy=0.7, t=11.82, inputs=(10, 25, 113, 300)
step 143, loss=0.756242, accuracy=0.3, t=11.28, inputs=(10, 38, 93, 300)
step 144, loss=0.529168, accuracy=0.9, t=15.06, inputs=(10, 31, 149, 300)
step 145, loss=0.935343, accuracy=0.1, t=59.48, inputs=(10, 17, 665, 300)
step 146, loss=0.729801, accuracy=0.7, t=7.78, inputs=(10, 16, 76, 300)
step 147, loss=0.66527, accuracy=0.6, t=8.76, inputs=(10, 36, 65, 300)
step 148, loss=0.899861, accuracy=0.3, t=7.61, inputs=(10, 34, 56, 300)
step 149, loss=0.989327, accuracy=0.6, t=5.91, inputs=(10, 14, 59, 300)
step 150, loss=0.781879, accuracy=0.6, t=11.7, inputs=(10, 25, 113, 300)
step 151, loss=0.545673, accuracy=0.9, t=10.77, inputs=(10, 34, 90, 300)
step 152, loss=0.709059, accuracy=0.6, t=9.7, inputs=(10, 33, 81, 300)
step 153, loss=0.638576, accuracy=0.8, t=10.73, inputs=(10, 35, 93, 300)
step 154, loss=0.665108, accuracy=0.7, t=9.07, inputs=(10, 46, 59, 300)
step 155, loss=0.674595, accuracy=0.5, t=10.74, inputs=(10, 39, 87, 300)
step 156, loss=0.682924, accuracy=0.4, t=8.85, inputs=(10, 18, 87, 300)
step 157, loss=0.659437, accuracy=0.6, t=12.3, inputs=(10, 45, 95, 300)
step 158, loss=0.628727, accuracy=0.5, t=10.35, inputs=(10, 20, 102, 300)
step 159, loss=0.655028, accuracy=0.5, t=11.4, inputs=(10, 36, 96, 300)
step 160, loss=0.808824, accuracy=0.3, t=7.39, inputs=(10, 15, 72, 300)
step 161, loss=0.682511, accuracy=0.4, t=6.44, inputs=(10, 16, 63, 300)
step 162, loss=0.630504, accuracy=0.5, t=10.17, inputs=(10, 31, 88, 300)
step 163, loss=0.678662, accuracy=0.7, t=9.36, inputs=(10, 37, 70, 300)
step 164, loss=0.639462, accuracy=0.7, t=6.26, inputs=(10, 18, 56, 300)
step 165, loss=0.669843, accuracy=0.4, t=11.08, inputs=(10, 39, 90, 300)
step 166, loss=0.609987, accuracy=0.8, t=6.83, inputs=(10, 24, 55, 300)
step 167, loss=0.665087, accuracy=0.5, t=10.3, inputs=(10, 17, 105, 300)
step 168, loss=0.628322, accuracy=0.7, t=8.19, inputs=(10, 32, 65, 300)
step 169, loss=0.827469, accuracy=0.3, t=10.09, inputs=(10, 17, 103, 300)
step 170, loss=0.611986, accuracy=0.8, t=8.39, inputs=(10, 15, 84, 300)
step 171, loss=0.540461, accuracy=0.8, t=6.33, inputs=(10, 19, 57, 300)
step 172, loss=0.614326, accuracy=0.8, t=9.46, inputs=(10, 35, 75, 300)
step 173, loss=0.661037, accuracy=0.7, t=11.2, inputs=(10, 29, 99, 300)
step 174, loss=0.668969, accuracy=0.5, t=11.81, inputs=(10, 25, 116, 300)
step 175, loss=0.606066, accuracy=0.9, t=8.39, inputs=(10, 35, 62, 300)
step 176, loss=0.74067, accuracy=0.4, t=9.81, inputs=(10, 45, 72, 300)
step 177, loss=0.478438, accuracy=1.0, t=6.38, inputs=(10, 20, 58, 300)
step 178, loss=0.758397, accuracy=0.4, t=8.7, inputs=(10, 14, 91, 300)
step 179, loss=0.693941, accuracy=0.5, t=9.08, inputs=(10, 33, 73, 300)
step 180, loss=0.535546, accuracy=0.8, t=7.03, inputs=(10, 23, 61, 300)
step 181, loss=0.653591, accuracy=0.6, t=11.17, inputs=(10, 37, 98, 300)
step 182, loss=0.718024, accuracy=0.5, t=10.91, inputs=(10, 53, 71, 300)
step 183, loss=0.71918, accuracy=0.3, t=12.61, inputs=(10, 71, 70, 300)
step 184, loss=0.491059, accuracy=0.9, t=8.5, inputs=(10, 19, 85, 300)
step 185, loss=0.631113, accuracy=0.7, t=12.18, inputs=(10, 32, 113, 300)
step 186, loss=0.744308, accuracy=0.6, t=8.0, inputs=(10, 13, 83, 300)
step 187, loss=0.558192, accuracy=0.7, t=11.03, inputs=(10, 19, 113, 300)
step 188, loss=0.683127, accuracy=0.5, t=7.88, inputs=(10, 27, 65, 300)
step 189, loss=0.670783, accuracy=0.8, t=9.92, inputs=(10, 35, 80, 300)
step 190, loss=0.51042, accuracy=0.6, t=7.44, inputs=(10, 12, 81, 300)
step 191, loss=0.580116, accuracy=0.8, t=10.65, inputs=(10, 49, 76, 300)
step 192, loss=0.639372, accuracy=0.7, t=9.95, inputs=(10, 18, 101, 300)
step 193, loss=0.61775, accuracy=0.8, t=31.72, inputs=(10, 49, 272, 300)
step 194, loss=0.594192, accuracy=0.7, t=6.28, inputs=(10, 20, 54, 300)
step 195, loss=0.727521, accuracy=0.7, t=9.59, inputs=(10, 22, 88, 300)
step 196, loss=0.581144, accuracy=0.7, t=7.47, inputs=(10, 26, 65, 300)
step 197, loss=0.586572, accuracy=0.6, t=20.17, inputs=(10, 22, 220, 300)
step 198, loss=0.811934, accuracy=0.4, t=8.63, inputs=(10, 33, 68, 300)
step 199, loss=0.604231, accuracy=0.6, t=10.76, inputs=(10, 38, 90, 300)
step 200, loss=0.68698, accuracy=0.7, t=9.54, inputs=(10, 16, 101, 300)
checkpoint & graph meta
checkpoint done
step 201, loss=0.687531, accuracy=0.6, t=6.98, inputs=(10, 20, 65, 300)
step 202, loss=0.524262, accuracy=0.7, t=8.08, inputs=(10, 28, 68, 300)
step 203, loss=0.584315, accuracy=0.7, t=22.32, inputs=(10, 19, 252, 300)
step 204, loss=0.511879, accuracy=0.9, t=9.99, inputs=(10, 27, 94, 300)
step 205, loss=0.590231, accuracy=0.8, t=7.1, inputs=(10, 33, 49, 300)
step 206, loss=0.641761, accuracy=0.8, t=8.43, inputs=(10, 17, 85, 300)
step 207, loss=0.654154, accuracy=0.7, t=9.56, inputs=(10, 30, 81, 300)
step 208, loss=0.639981, accuracy=0.6, t=18.37, inputs=(10, 28, 186, 300)
step 209, loss=0.545908, accuracy=0.7, t=7.89, inputs=(10, 26, 69, 300)
step 210, loss=0.549533, accuracy=0.8, t=8.83, inputs=(10, 41, 64, 300)
step 211, loss=0.357069, accuracy=0.9, t=6.13, inputs=(10, 22, 53, 300)
step 212, loss=0.519565, accuracy=0.8, t=8.32, inputs=(10, 32, 67, 300)
step 213, loss=0.530484, accuracy=0.7, t=10.26, inputs=(10, 56, 64, 300)
step 214, loss=0.479529, accuracy=0.8, t=6.0, inputs=(10, 17, 55, 300)
step 215, loss=0.5467, accuracy=0.7, t=7.9, inputs=(10, 26, 67, 300)
step 216, loss=0.530919, accuracy=0.7, t=7.18, inputs=(10, 31, 54, 300)
step 217, loss=0.510751, accuracy=0.8, t=13.38, inputs=(10, 45, 109, 300)
step 218, loss=0.881384, accuracy=0.6, t=9.47, inputs=(10, 19, 94, 300)
step 219, loss=0.369272, accuracy=0.8, t=8.13, inputs=(10, 19, 76, 300)
step 220, loss=0.510117, accuracy=0.7, t=9.03, inputs=(10, 19, 88, 300)
step 221, loss=0.670428, accuracy=0.6, t=9.11, inputs=(10, 33, 75, 300)
step 222, loss=0.435367, accuracy=0.9, t=8.84, inputs=(10, 22, 82, 300)
step 223, loss=0.607875, accuracy=0.7, t=8.96, inputs=(10, 41, 64, 300)
step 224, loss=0.842519, accuracy=0.4, t=5.96, inputs=(10, 15, 57, 300)
step 225, loss=0.606833, accuracy=0.6, t=14.72, inputs=(10, 25, 148, 300)
step 226, loss=0.496403, accuracy=0.7, t=11.04, inputs=(10, 43, 86, 300)
step 227, loss=0.499155, accuracy=0.7, t=9.84, inputs=(10, 13, 104, 300)
step 228, loss=0.500318, accuracy=0.8, t=12.08, inputs=(10, 48, 89, 300)
step 229, loss=0.507281, accuracy=0.6, t=7.44, inputs=(10, 26, 64, 300)
step 230, loss=0.693437, accuracy=0.7, t=25.97, inputs=(10, 19, 291, 300)
step 231, loss=0.726843, accuracy=0.5, t=7.31, inputs=(10, 19, 68, 300)
step 232, loss=0.776327, accuracy=0.6, t=18.51, inputs=(10, 36, 184, 300)
step 233, loss=0.560051, accuracy=0.7, t=19.72, inputs=(10, 35, 197, 300)
step 234, loss=0.433054, accuracy=0.9, t=14.48, inputs=(10, 31, 143, 300)
step 235, loss=0.483411, accuracy=0.8, t=9.37, inputs=(10, 31, 79, 300)
step 236, loss=0.590708, accuracy=0.6, t=8.38, inputs=(10, 20, 81, 300)
step 237, loss=0.287642, accuracy=1.0, t=6.71, inputs=(10, 21, 63, 300)
step 238, loss=0.487943, accuracy=0.7, t=18.14, inputs=(10, 20, 198, 300)
step 239, loss=0.692253, accuracy=0.6, t=11.7, inputs=(10, 17, 127, 300)
step 240, loss=0.857229, accuracy=0.4, t=14.01, inputs=(10, 28, 139, 300)
step 241, loss=0.762325, accuracy=0.7, t=6.47, inputs=(10, 24, 55, 300)
step 242, loss=0.641401, accuracy=0.6, t=7.01, inputs=(10, 19, 65, 300)
step 243, loss=0.758394, accuracy=0.7, t=9.2, inputs=(10, 33, 77, 300)
step 244, loss=0.620559, accuracy=0.5, t=17.02, inputs=(10, 17, 187, 300)
step 245, loss=0.636226, accuracy=0.8, t=11.42, inputs=(10, 39, 98, 300)
step 246, loss=0.623167, accuracy=0.6, t=7.54, inputs=(10, 21, 70, 300)
step 247, loss=0.610626, accuracy=0.7, t=17.41, inputs=(10, 23, 187, 300)
step 248, loss=0.480976, accuracy=0.8, t=11.01, inputs=(10, 35, 95, 300)
step 249, loss=0.459194, accuracy=0.9, t=7.8, inputs=(10, 32, 56, 300)
step 250, loss=0.652524, accuracy=0.5, t=6.69, inputs=(10, 14, 70, 300)
step 251, loss=0.568622, accuracy=0.9, t=11.44, inputs=(10, 58, 75, 300)
step 252, loss=0.594596, accuracy=0.5, t=10.95, inputs=(10, 22, 106, 300)
step 253, loss=0.570898, accuracy=0.6, t=12.04, inputs=(10, 30, 113, 300)
step 254, loss=0.684085, accuracy=0.6, t=6.34, inputs=(10, 29, 52, 300)
step 255, loss=0.550342, accuracy=0.7, t=12.16, inputs=(10, 59, 85, 300)
step 256, loss=0.568795, accuracy=0.8, t=9.74, inputs=(10, 24, 93, 300)
step 257, loss=0.628796, accuracy=0.8, t=9.03, inputs=(10, 39, 70, 300)
step 258, loss=0.39755, accuracy=1.0, t=7.97, inputs=(10, 21, 77, 300)
step 259, loss=0.588041, accuracy=0.6, t=5.46, inputs=(10, 21, 46, 300)
step 260, loss=0.507841, accuracy=0.6, t=6.15, inputs=(10, 16, 60, 300)
step 261, loss=0.566705, accuracy=0.7, t=10.72, inputs=(10, 29, 92, 300)
step 262, loss=0.392351, accuracy=0.8, t=6.63, inputs=(10, 10, 73, 300)
step 263, loss=0.601443, accuracy=0.6, t=8.31, inputs=(10, 41, 60, 300)
step 264, loss=0.816107, accuracy=0.6, t=14.46, inputs=(10, 23, 152, 300)
step 265, loss=0.398324, accuracy=0.9, t=7.76, inputs=(10, 18, 78, 300)
step 266, loss=0.339299, accuracy=0.8, t=8.28, inputs=(10, 18, 84, 300)
step 267, loss=0.634396, accuracy=0.9, t=8.53, inputs=(10, 22, 80, 300)
step 268, loss=0.840002, accuracy=0.6, t=8.98, inputs=(10, 30, 78, 300)
step 269, loss=0.742073, accuracy=0.6, t=12.86, inputs=(10, 26, 128, 300)
step 270, loss=0.670785, accuracy=0.5, t=10.75, inputs=(10, 49, 77, 300)
step 271, loss=0.754606, accuracy=0.5, t=16.17, inputs=(10, 19, 178, 300)
step 272, loss=0.378247, accuracy=0.9, t=10.11, inputs=(10, 29, 89, 300)
step 273, loss=0.612265, accuracy=0.7, t=7.63, inputs=(10, 23, 62, 300)
step 274, loss=0.463964, accuracy=0.7, t=8.45, inputs=(10, 33, 61, 300)
step 275, loss=0.702386, accuracy=0.5, t=9.45, inputs=(10, 49, 52, 300)
step 276, loss=0.525012, accuracy=0.6, t=12.89, inputs=(10, 23, 120, 300)
step 277, loss=0.534126, accuracy=0.6, t=7.86, inputs=(10, 15, 73, 300)
step 278, loss=0.445746, accuracy=0.9, t=11.13, inputs=(10, 22, 100, 300)
step 279, loss=0.765065, accuracy=0.5, t=6.24, inputs=(10, 21, 50, 300)
step 280, loss=0.703522, accuracy=0.6, t=8.02, inputs=(10, 15, 75, 300)
step 281, loss=0.55372, accuracy=0.7, t=11.8, inputs=(10, 15, 113, 300)
step 282, loss=0.627125, accuracy=0.7, t=9.96, inputs=(10, 41, 66, 300)
step 283, loss=0.637919, accuracy=0.6, t=10.2, inputs=(10, 27, 87, 300)
step 284, loss=0.564966, accuracy=0.7, t=10.16, inputs=(10, 45, 64, 300)
step 285, loss=0.266049, accuracy=1.0, t=7.47, inputs=(10, 15, 68, 300)
step 286, loss=0.354213, accuracy=1.0, t=18.38, inputs=(10, 20, 178, 300)
step 287, loss=0.468724, accuracy=0.9, t=9.87, inputs=(10, 37, 70, 300)
step 288, loss=0.524656, accuracy=0.6, t=11.5, inputs=(10, 17, 110, 300)
step 289, loss=0.549026, accuracy=0.8, t=13.33, inputs=(10, 38, 100, 300)
step 290, loss=0.401078, accuracy=0.9, t=9.2, inputs=(10, 18, 82, 300)
step 291, loss=0.540209, accuracy=0.8, t=7.76, inputs=(10, 18, 64, 300)
step 292, loss=0.619639, accuracy=0.7, t=12.17, inputs=(10, 31, 100, 300)
step 293, loss=0.520541, accuracy=0.8, t=7.88, inputs=(10, 15, 70, 300)
step 294, loss=0.327903, accuracy=1.0, t=9.96, inputs=(10, 34, 73, 300)
step 295, loss=0.414736, accuracy=0.9, t=9.4, inputs=(10, 54, 47, 300)
step 296, loss=0.826841, accuracy=0.6, t=13.54, inputs=(10, 28, 121, 300)
step 297, loss=0.61074, accuracy=0.5, t=12.04, inputs=(10, 16, 116, 300)
step 298, loss=0.744939, accuracy=0.7, t=21.1, inputs=(10, 29, 202, 300)
step 299, loss=0.568617, accuracy=0.6, t=10.58, inputs=(10, 47, 69, 300)
step 300, loss=0.479264, accuracy=0.8, t=7.77, inputs=(10, 32, 56, 300)
checkpoint & graph meta
checkpoint done
step 301, loss=1.08357, accuracy=0.4, t=15.52, inputs=(10, 22, 149, 300)
step 302, loss=0.576727, accuracy=0.6, t=7.74, inputs=(10, 29, 58, 300)
step 303, loss=0.562974, accuracy=0.7, t=9.75, inputs=(10, 34, 75, 300)
step 304, loss=0.559329, accuracy=0.6, t=7.45, inputs=(10, 11, 73, 300)
step 305, loss=0.441959, accuracy=0.6, t=6.15, inputs=(10, 17, 53, 300)
step 306, loss=0.808961, accuracy=0.6, t=10.82, inputs=(10, 37, 80, 300)
step 307, loss=0.652616, accuracy=0.6, t=10.65, inputs=(10, 38, 79, 300)
step 308, loss=0.987053, accuracy=0.5, t=13.95, inputs=(10, 38, 112, 300)
step 309, loss=0.59426, accuracy=0.7, t=12.59, inputs=(10, 20, 119, 300)
step 310, loss=0.381819, accuracy=0.8, t=7.8, inputs=(10, 14, 69, 300)
step 311, loss=0.427465, accuracy=0.7, t=8.09, inputs=(10, 26, 61, 300)
step 312, loss=0.483985, accuracy=0.7, t=10.02, inputs=(10, 22, 87, 300)
step 313, loss=0.27051, accuracy=0.9, t=7.72, inputs=(10, 16, 81, 300)
step 314, loss=0.626253, accuracy=0.7, t=10.3, inputs=(10, 22, 105, 300)
step 315, loss=0.710055, accuracy=0.5, t=6.73, inputs=(10, 24, 57, 300)
step 316, loss=0.620315, accuracy=0.6, t=11.51, inputs=(10, 40, 94, 300)
step 317, loss=0.364145, accuracy=0.9, t=6.95, inputs=(10, 15, 70, 300)
step 318, loss=0.516906, accuracy=0.8, t=10.32, inputs=(10, 27, 91, 300)
step 319, loss=1.03278, accuracy=0.6, t=8.08, inputs=(10, 15, 82, 300)
step 320, loss=0.413545, accuracy=0.8, t=7.47, inputs=(10, 26, 62, 300)
step 321, loss=0.564812, accuracy=0.8, t=5.65, inputs=(10, 24, 47, 300)
step 322, loss=0.475586, accuracy=0.7, t=7.87, inputs=(10, 20, 77, 300)
step 323, loss=0.594272, accuracy=0.8, t=20.79, inputs=(10, 22, 228, 300)
step 324, loss=0.531874, accuracy=0.7, t=8.24, inputs=(10, 22, 81, 300)
step 325, loss=0.386063, accuracy=0.9, t=6.57, inputs=(10, 21, 60, 300)
step 326, loss=0.489395, accuracy=0.7, t=7.61, inputs=(10, 19, 73, 300)
step 327, loss=0.502361, accuracy=0.8, t=6.77, inputs=(10, 28, 54, 300)
step 328, loss=0.25819, accuracy=1.0, t=6.64, inputs=(10, 24, 59, 300)
step 329, loss=0.554695, accuracy=0.6, t=9.28, inputs=(10, 34, 75, 300)
step 330, loss=0.213289, accuracy=1.0, t=12.33, inputs=(10, 16, 133, 300)
step 331, loss=0.628648, accuracy=0.6, t=7.6, inputs=(10, 22, 70, 300)
step 332, loss=0.594088, accuracy=0.7, t=6.6, inputs=(10, 17, 64, 300)
step 333, loss=0.427513, accuracy=0.8, t=8.81, inputs=(10, 22, 82, 300)
step 334, loss=0.659315, accuracy=0.6, t=13.94, inputs=(10, 36, 128, 300)
step 335, loss=0.441601, accuracy=0.8, t=11.09, inputs=(10, 27, 108, 300)
step 336, loss=0.283737, accuracy=1.0, t=10.1, inputs=(10, 30, 88, 300)
step 337, loss=0.407965, accuracy=0.8, t=9.61, inputs=(10, 29, 87, 300)
step 338, loss=0.537662, accuracy=0.6, t=7.69, inputs=(10, 22, 70, 300)
step 339, loss=0.44729, accuracy=0.9, t=10.29, inputs=(10, 42, 79, 300)
step 340, loss=0.604082, accuracy=0.6, t=9.09, inputs=(10, 36, 76, 300)
step 341, loss=0.528729, accuracy=0.7, t=7.89, inputs=(10, 12, 84, 300)
step 342, loss=0.399829, accuracy=0.8, t=10.03, inputs=(10, 34, 85, 300)
step 343, loss=0.601686, accuracy=0.7, t=6.31, inputs=(10, 20, 54, 300)
step 344, loss=0.785623, accuracy=0.5, t=8.61, inputs=(10, 43, 64, 300)
step 345, loss=0.647831, accuracy=0.5, t=7.92, inputs=(10, 34, 61, 300)
step 346, loss=0.612553, accuracy=0.6, t=14.84, inputs=(10, 37, 125, 300)
step 347, loss=0.261351, accuracy=1.0, t=7.59, inputs=(10, 20, 63, 300)
step 348, loss=0.346268, accuracy=0.8, t=6.48, inputs=(10, 23, 51, 300)
step 349, loss=0.473015, accuracy=0.7, t=7.33, inputs=(10, 30, 52, 300)
step 350, loss=0.663025, accuracy=0.5, t=9.95, inputs=(10, 30, 77, 300)
step 351, loss=0.305707, accuracy=0.9, t=8.79, inputs=(10, 35, 60, 300)
step 352, loss=0.382447, accuracy=0.8, t=12.3, inputs=(10, 50, 87, 300)
step 353, loss=0.408819, accuracy=0.8, t=10.35, inputs=(10, 17, 96, 300)
step 354, loss=0.379801, accuracy=0.9, t=10.3, inputs=(10, 28, 87, 300)
step 355, loss=0.556584, accuracy=0.5, t=10.43, inputs=(10, 39, 77, 300)
step 356, loss=0.399932, accuracy=0.9, t=9.78, inputs=(10, 17, 101, 300)
step 357, loss=0.466726, accuracy=0.6, t=9.62, inputs=(10, 40, 77, 300)
step 358, loss=0.668821, accuracy=0.7, t=8.36, inputs=(10, 47, 54, 300)
step 359, loss=0.295508, accuracy=1.0, t=5.86, inputs=(10, 20, 53, 300)
step 360, loss=0.560799, accuracy=0.6, t=7.09, inputs=(10, 23, 67, 300)
step 361, loss=0.411627, accuracy=0.8, t=11.28, inputs=(10, 21, 115, 300)
step 362, loss=0.42212, accuracy=0.9, t=9.32, inputs=(10, 12, 105, 300)
step 363, loss=0.260987, accuracy=1.0, t=10.94, inputs=(10, 34, 96, 300)
step 364, loss=0.220672, accuracy=0.9, t=7.95, inputs=(10, 43, 51, 300)
step 365, loss=0.141139, accuracy=1.0, t=10.93, inputs=(10, 18, 113, 300)
step 366, loss=0.363141, accuracy=0.8, t=15.21, inputs=(10, 44, 138, 300)
step 367, loss=0.491944, accuracy=0.9, t=13.43, inputs=(10, 51, 107, 300)
step 368, loss=0.38721, accuracy=0.8, t=12.57, inputs=(10, 34, 116, 300)
step 369, loss=0.761613, accuracy=0.6, t=10.52, inputs=(10, 48, 76, 300)
step 370, loss=0.373265, accuracy=0.9, t=9.26, inputs=(10, 21, 93, 300)
step 371, loss=0.248838, accuracy=1.0, t=14.12, inputs=(10, 45, 125, 300)
step 372, loss=0.185861, accuracy=1.0, t=10.5, inputs=(10, 14, 115, 300)
step 373, loss=0.404639, accuracy=0.9, t=9.37, inputs=(10, 28, 83, 300)
step 374, loss=0.387863, accuracy=0.9, t=7.46, inputs=(10, 16, 79, 300)
step 375, loss=0.283084, accuracy=0.9, t=8.21, inputs=(10, 29, 68, 300)
step 376, loss=0.327868, accuracy=0.9, t=14.1, inputs=(10, 22, 148, 300)
step 377, loss=0.875754, accuracy=0.6, t=8.04, inputs=(10, 12, 87, 300)
step 378, loss=0.379182, accuracy=0.7, t=14.27, inputs=(10, 35, 136, 300)
step 379, loss=0.507504, accuracy=0.7, t=8.82, inputs=(10, 35, 66, 300)
step 380, loss=0.659434, accuracy=0.7, t=6.48, inputs=(10, 20, 62, 300)
step 381, loss=0.474578, accuracy=0.8, t=8.48, inputs=(10, 36, 63, 300)
step 382, loss=0.423361, accuracy=0.7, t=9.62, inputs=(10, 26, 91, 300)
step 383, loss=0.251328, accuracy=0.9, t=10.26, inputs=(10, 29, 96, 300)
step 384, loss=0.744984, accuracy=0.5, t=11.38, inputs=(10, 50, 84, 300)
step 385, loss=0.526832, accuracy=0.6, t=9.96, inputs=(10, 29, 91, 300)
step 386, loss=0.319183, accuracy=0.9, t=7.37, inputs=(10, 20, 70, 300)
step 387, loss=0.249797, accuracy=1.0, t=8.06, inputs=(10, 43, 53, 300)
step 388, loss=0.47134, accuracy=0.7, t=11.69, inputs=(10, 14, 129, 300)
step 389, loss=0.677498, accuracy=0.5, t=10.76, inputs=(10, 19, 110, 300)
step 390, loss=0.421168, accuracy=0.8, t=8.51, inputs=(10, 16, 81, 300)
step 391, loss=0.269468, accuracy=1.0, t=6.53, inputs=(10, 19, 54, 300)
step 392, loss=0.27306, accuracy=0.9, t=8.09, inputs=(10, 17, 72, 300)
step 393, loss=0.453661, accuracy=0.8, t=17.93, inputs=(10, 23, 171, 300)
step 394, loss=0.340588, accuracy=0.9, t=11.3, inputs=(10, 36, 88, 300)
step 395, loss=0.424151, accuracy=0.8, t=8.91, inputs=(10, 18, 82, 300)
step 396, loss=0.446903, accuracy=0.8, t=5.56, inputs=(10, 23, 38, 300)
step 397, loss=0.606333, accuracy=0.6, t=9.11, inputs=(10, 34, 67, 300)
step 398, loss=0.832099, accuracy=0.6, t=12.53, inputs=(10, 30, 110, 300)
step 399, loss=0.618247, accuracy=0.9, t=9.75, inputs=(10, 37, 69, 300)
step 400, loss=0.237122, accuracy=0.9, t=6.82, inputs=(10, 16, 59, 300)
checkpoint & graph meta
checkpoint done
step 401, loss=0.528999, accuracy=0.7, t=11.38, inputs=(10, 39, 85, 300)
step 402, loss=0.731366, accuracy=0.6, t=8.23, inputs=(10, 39, 53, 300)
step 403, loss=0.248134, accuracy=1.0, t=6.48, inputs=(10, 15, 56, 300)
step 404, loss=0.555235, accuracy=0.7, t=11.42, inputs=(10, 26, 101, 300)
step 405, loss=0.472575, accuracy=0.7, t=15.3, inputs=(10, 35, 132, 300)
step 406, loss=0.319596, accuracy=1.0, t=9.18, inputs=(10, 31, 70, 300)
step 407, loss=0.728268, accuracy=0.6, t=8.36, inputs=(10, 25, 67, 300)
step 408, loss=0.185958, accuracy=1.0, t=10.45, inputs=(10, 31, 88, 300)
step 409, loss=0.299942, accuracy=0.8, t=12.0, inputs=(10, 34, 98, 300)
step 410, loss=0.579225, accuracy=0.7, t=8.96, inputs=(10, 21, 75, 300)
step 411, loss=0.321744, accuracy=0.9, t=11.6, inputs=(10, 23, 102, 300)
step 412, loss=0.563228, accuracy=0.7, t=6.83, inputs=(10, 20, 57, 300)
step 413, loss=0.286033, accuracy=0.9, t=11.0, inputs=(10, 26, 98, 300)
step 414, loss=0.436728, accuracy=0.8, t=8.13, inputs=(10, 23, 68, 300)
step 415, loss=0.609407, accuracy=0.6, t=17.57, inputs=(10, 34, 154, 300)
step 416, loss=0.430572, accuracy=0.9, t=11.34, inputs=(10, 37, 88, 300)
step 417, loss=0.425988, accuracy=0.8, t=11.2, inputs=(10, 44, 77, 300)
step 418, loss=0.482938, accuracy=0.8, t=9.94, inputs=(10, 39, 66, 300)
step 419, loss=0.473595, accuracy=0.7, t=8.63, inputs=(10, 15, 82, 300)
step 420, loss=0.558693, accuracy=0.6, t=9.82, inputs=(10, 42, 63, 300)
step 421, loss=0.519422, accuracy=0.7, t=16.12, inputs=(10, 30, 141, 300)
step 422, loss=0.500861, accuracy=0.8, t=17.94, inputs=(10, 15, 181, 300)
step 423, loss=0.409021, accuracy=0.9, t=11.05, inputs=(10, 27, 97, 300)
step 424, loss=0.287397, accuracy=1.0, t=29.22, inputs=(10, 31, 295, 300)
step 425, loss=0.304689, accuracy=0.9, t=7.91, inputs=(10, 19, 71, 300)
step 426, loss=0.672151, accuracy=0.7, t=8.98, inputs=(10, 19, 81, 300)
step 427, loss=0.495164, accuracy=0.7, t=10.8, inputs=(10, 38, 79, 300)
step 428, loss=0.711931, accuracy=0.8, t=7.38, inputs=(10, 17, 67, 300)
step 429, loss=0.533859, accuracy=0.6, t=7.51, inputs=(10, 24, 65, 300)
step 430, loss=0.627397, accuracy=0.6, t=14.43, inputs=(10, 16, 163, 300)
step 431, loss=0.548883, accuracy=0.8, t=13.53, inputs=(10, 46, 117, 300)
step 432, loss=0.292363, accuracy=0.9, t=8.76, inputs=(10, 19, 86, 300)
step 433, loss=0.547398, accuracy=0.6, t=12.46, inputs=(10, 20, 124, 300)
step 434, loss=0.568823, accuracy=0.7, t=7.76, inputs=(10, 17, 78, 300)
step 435, loss=0.488203, accuracy=0.7, t=13.18, inputs=(10, 17, 137, 300)
step 436, loss=0.55241, accuracy=0.8, t=8.69, inputs=(10, 22, 82, 300)
step 437, loss=0.320155, accuracy=0.9, t=18.04, inputs=(10, 34, 176, 300)
step 438, loss=0.368815, accuracy=0.9, t=11.62, inputs=(10, 37, 100, 300)
step 439, loss=0.646003, accuracy=0.8, t=8.43, inputs=(10, 21, 81, 300)
step 440, loss=0.470662, accuracy=0.7, t=7.32, inputs=(10, 21, 68, 300)
step 441, loss=0.300319, accuracy=0.9, t=8.43, inputs=(10, 20, 79, 300)
step 442, loss=0.250965, accuracy=0.9, t=7.28, inputs=(10, 18, 71, 300)
step 443, loss=0.203419, accuracy=0.9, t=10.24, inputs=(10, 23, 102, 300)
step 444, loss=0.729278, accuracy=0.7, t=11.64, inputs=(10, 28, 108, 300)
step 445, loss=0.384619, accuracy=0.9, t=9.78, inputs=(10, 35, 77, 300)
step 446, loss=0.348398, accuracy=0.8, t=9.71, inputs=(10, 23, 94, 300)
step 447, loss=0.3617, accuracy=0.7, t=10.02, inputs=(10, 23, 97, 300)
step 448, loss=0.346674, accuracy=0.8, t=11.95, inputs=(10, 21, 121, 300)
step 449, loss=0.307962, accuracy=0.8, t=7.15, inputs=(10, 29, 55, 300)
step 450, loss=0.676668, accuracy=0.8, t=8.77, inputs=(10, 38, 63, 300)
step 451, loss=0.475336, accuracy=0.7, t=6.74, inputs=(10, 21, 61, 300)
step 452, loss=0.497333, accuracy=0.7, t=6.82, inputs=(10, 27, 55, 300)
step 453, loss=0.348082, accuracy=0.8, t=9.78, inputs=(10, 23, 93, 300)
step 454, loss=0.48076, accuracy=0.8, t=9.53, inputs=(10, 27, 84, 300)
step 455, loss=0.206608, accuracy=1.0, t=10.75, inputs=(10, 36, 90, 300)
step 456, loss=0.527469, accuracy=0.8, t=11.46, inputs=(10, 30, 103, 300)
step 457, loss=0.303274, accuracy=0.9, t=6.56, inputs=(10, 19, 60, 300)
step 458, loss=0.74917, accuracy=0.8, t=6.89, inputs=(10, 13, 68, 300)
step 459, loss=0.606303, accuracy=0.8, t=7.02, inputs=(10, 30, 53, 300)
step 460, loss=0.524711, accuracy=0.8, t=9.38, inputs=(10, 23, 84, 300)
step 461, loss=0.954479, accuracy=0.7, t=9.28, inputs=(10, 35, 77, 300)
step 462, loss=0.196013, accuracy=1.0, t=7.41, inputs=(10, 20, 66, 300)
step 463, loss=0.325068, accuracy=0.8, t=5.33, inputs=(10, 18, 46, 300)
step 464, loss=0.241829, accuracy=1.0, t=10.72, inputs=(10, 16, 112, 300)
step 465, loss=0.496429, accuracy=0.8, t=12.65, inputs=(10, 42, 103, 300)
step 466, loss=0.301117, accuracy=0.8, t=9.04, inputs=(10, 28, 77, 300)
step 467, loss=0.211657, accuracy=0.9, t=6.63, inputs=(10, 14, 66, 300)
step 468, loss=0.195319, accuracy=1.0, t=9.69, inputs=(10, 46, 65, 300)
step 469, loss=0.158168, accuracy=1.0, t=7.32, inputs=(10, 23, 66, 300)
step 470, loss=0.277619, accuracy=0.9, t=20.33, inputs=(10, 23, 220, 300)
step 471, loss=0.18411, accuracy=0.9, t=9.32, inputs=(10, 21, 92, 300)
step 472, loss=0.264376, accuracy=0.9, t=8.08, inputs=(10, 26, 69, 300)
step 473, loss=0.322866, accuracy=0.8, t=8.35, inputs=(10, 22, 79, 300)
step 474, loss=0.393354, accuracy=0.9, t=9.9, inputs=(10, 33, 88, 300)
step 475, loss=0.193278, accuracy=0.9, t=8.42, inputs=(10, 36, 68, 300)
step 476, loss=0.470172, accuracy=0.7, t=13.9, inputs=(10, 28, 141, 300)
step 477, loss=1.25138, accuracy=0.5, t=10.95, inputs=(10, 15, 120, 300)
step 478, loss=0.604474, accuracy=0.7, t=10.02, inputs=(10, 20, 101, 300)
step 479, loss=0.511643, accuracy=0.8, t=11.26, inputs=(10, 26, 109, 300)
step 480, loss=0.350916, accuracy=0.9, t=7.92, inputs=(10, 45, 51, 300)
step 481, loss=0.217352, accuracy=0.9, t=9.09, inputs=(10, 34, 80, 300)
step 482, loss=0.453681, accuracy=0.7, t=11.89, inputs=(10, 20, 124, 300)
step 483, loss=0.187804, accuracy=1.0, t=9.37, inputs=(10, 22, 91, 300)
step 484, loss=0.459596, accuracy=0.6, t=9.9, inputs=(10, 28, 90, 300)
step 485, loss=0.868849, accuracy=0.4, t=6.84, inputs=(10, 28, 54, 300)
step 486, loss=0.530523, accuracy=0.7, t=9.54, inputs=(10, 17, 99, 300)
step 487, loss=0.221401, accuracy=0.9, t=10.89, inputs=(10, 36, 91, 300)
step 488, loss=0.734326, accuracy=0.6, t=8.19, inputs=(10, 25, 75, 300)
step 489, loss=0.288507, accuracy=1.0, t=10.59, inputs=(10, 42, 84, 300)
step 490, loss=0.323895, accuracy=0.9, t=14.49, inputs=(10, 28, 148, 300)
step 491, loss=0.585075, accuracy=0.7, t=7.35, inputs=(10, 23, 63, 300)
step 492, loss=0.313459, accuracy=0.8, t=6.87, inputs=(10, 14, 68, 300)
step 493, loss=0.82529, accuracy=0.7, t=10.68, inputs=(10, 23, 105, 300)
step 494, loss=0.158277, accuracy=1.0, t=9.89, inputs=(10, 36, 78, 300)
step 495, loss=0.481211, accuracy=0.8, t=17.08, inputs=(10, 51, 152, 300)
step 496, loss=0.579913, accuracy=0.6, t=9.31, inputs=(10, 18, 98, 300)
step 497, loss=0.572451, accuracy=0.7, t=6.9, inputs=(10, 21, 59, 300)
step 498, loss=0.209936, accuracy=1.0, t=6.34, inputs=(10, 17, 60, 300)
step 499, loss=0.35347, accuracy=0.9, t=9.0, inputs=(10, 22, 87, 300)
step 500, loss=0.69256, accuracy=0.6, t=11.87, inputs=(10, 15, 116, 300)
checkpoint & graph meta
checkpoint done
step 501, loss=0.521025, accuracy=0.7, t=10.6, inputs=(10, 55, 58, 300)
step 502, loss=0.389964, accuracy=0.7, t=15.77, inputs=(10, 23, 159, 300)
step 503, loss=0.67553, accuracy=0.8, t=6.96, inputs=(10, 19, 67, 300)
step 504, loss=0.426787, accuracy=0.8, t=9.23, inputs=(10, 20, 90, 300)
step 505, loss=0.374951, accuracy=0.8, t=7.7, inputs=(10, 18, 74, 300)
step 506, loss=0.534921, accuracy=0.8, t=13.97, inputs=(10, 63, 86, 300)
step 507, loss=0.307725, accuracy=0.9, t=7.71, inputs=(10, 23, 62, 300)
step 508, loss=0.361012, accuracy=0.9, t=14.15, inputs=(10, 20, 135, 300)
step 509, loss=0.493288, accuracy=0.8, t=8.88, inputs=(10, 21, 76, 300)
step 510, loss=0.647665, accuracy=0.5, t=10.35, inputs=(10, 36, 77, 300)
step 511, loss=0.501176, accuracy=0.8, t=9.22, inputs=(10, 47, 61, 300)
step 512, loss=0.357067, accuracy=0.8, t=10.78, inputs=(10, 42, 83, 300)
step 513, loss=0.461059, accuracy=0.8, t=10.7, inputs=(10, 41, 88, 300)
step 514, loss=0.751129, accuracy=0.6, t=7.71, inputs=(10, 18, 77, 300)
step 515, loss=0.653092, accuracy=0.8, t=16.83, inputs=(10, 21, 176, 300)
step 516, loss=0.450213, accuracy=0.8, t=8.16, inputs=(10, 21, 77, 300)
step 517, loss=0.388786, accuracy=0.9, t=7.83, inputs=(10, 25, 69, 300)
step 518, loss=0.291248, accuracy=1.0, t=9.07, inputs=(10, 26, 87, 300)
step 519, loss=0.608129, accuracy=0.8, t=5.34, inputs=(10, 16, 52, 300)
step 520, loss=0.722314, accuracy=0.9, t=16.43, inputs=(10, 25, 170, 300)
step 521, loss=0.686192, accuracy=0.6, t=9.08, inputs=(10, 28, 83, 300)
step 522, loss=0.344738, accuracy=0.8, t=12.99, inputs=(10, 21, 134, 300)
step 523, loss=0.275543, accuracy=0.9, t=6.32, inputs=(10, 27, 50, 300)
step 524, loss=0.350718, accuracy=0.8, t=9.49, inputs=(10, 57, 52, 300)
step 525, loss=0.32866, accuracy=0.9, t=5.47, inputs=(10, 23, 46, 300)
step 526, loss=0.523439, accuracy=0.7, t=10.86, inputs=(10, 42, 90, 300)
step 527, loss=0.919764, accuracy=0.4, t=7.49, inputs=(10, 15, 78, 300)
step 528, loss=0.246434, accuracy=1.0, t=14.78, inputs=(10, 29, 148, 300)
step 529, loss=0.55939, accuracy=0.7, t=7.7, inputs=(10, 32, 61, 300)
step 530, loss=0.427387, accuracy=0.8, t=15.85, inputs=(10, 27, 164, 300)
step 531, loss=0.536552, accuracy=0.6, t=8.3, inputs=(10, 25, 76, 300)
step 532, loss=0.362049, accuracy=0.9, t=11.07, inputs=(10, 18, 115, 300)
step 533, loss=0.534529, accuracy=0.8, t=11.36, inputs=(10, 53, 82, 300)
step 534, loss=0.654329, accuracy=0.7, t=7.91, inputs=(10, 16, 78, 300)
step 535, loss=0.610249, accuracy=0.7, t=11.04, inputs=(10, 21, 116, 300)
step 536, loss=0.293405, accuracy=0.8, t=8.26, inputs=(10, 30, 70, 300)
step 537, loss=0.416998, accuracy=0.8, t=9.57, inputs=(10, 23, 91, 300)
step 538, loss=0.213685, accuracy=1.0, t=8.76, inputs=(10, 24, 79, 300)
step 539, loss=0.248766, accuracy=0.9, t=9.86, inputs=(10, 27, 91, 300)
step 540, loss=0.163513, accuracy=1.0, t=6.49, inputs=(10, 17, 63, 300)
step 541, loss=0.405217, accuracy=0.9, t=5.99, inputs=(10, 24, 48, 300)
step 542, loss=0.515769, accuracy=0.7, t=9.03, inputs=(10, 17, 92, 300)
step 543, loss=0.38412, accuracy=0.8, t=12.72, inputs=(10, 44, 107, 300)
step 544, loss=0.242808, accuracy=0.9, t=7.18, inputs=(10, 14, 76, 300)
step 545, loss=0.71789, accuracy=0.6, t=15.73, inputs=(10, 27, 165, 300)
step 546, loss=0.441797, accuracy=0.8, t=13.52, inputs=(10, 21, 142, 300)
step 547, loss=0.222916, accuracy=1.0, t=9.18, inputs=(10, 30, 83, 300)
step 548, loss=0.206137, accuracy=1.0, t=7.83, inputs=(10, 19, 77, 300)
step 549, loss=0.553444, accuracy=0.7, t=7.36, inputs=(10, 26, 65, 300)
step 550, loss=0.388173, accuracy=0.8, t=9.55, inputs=(10, 12, 101, 300)
step 551, loss=0.268798, accuracy=1.0, t=9.27, inputs=(10, 18, 95, 300)
step 552, loss=0.394916, accuracy=0.8, t=8.17, inputs=(10, 31, 66, 300)
step 553, loss=0.170873, accuracy=1.0, t=12.47, inputs=(10, 29, 121, 300)
step 554, loss=0.555694, accuracy=0.7, t=8.0, inputs=(10, 15, 84, 300)
step 555, loss=0.414774, accuracy=0.9, t=6.76, inputs=(10, 21, 63, 300)
step 556, loss=0.519558, accuracy=0.8, t=10.54, inputs=(10, 37, 92, 300)
step 557, loss=0.244669, accuracy=0.9, t=22.85, inputs=(10, 28, 244, 300)
step 558, loss=0.409292, accuracy=0.8, t=7.88, inputs=(10, 24, 71, 300)
step 559, loss=0.103032, accuracy=1.0, t=10.88, inputs=(10, 25, 108, 300)
step 560, loss=0.233671, accuracy=0.9, t=10.32, inputs=(10, 15, 105, 300)
step 561, loss=0.875954, accuracy=0.5, t=8.68, inputs=(10, 45, 51, 300)
step 562, loss=0.162219, accuracy=1.0, t=10.65, inputs=(10, 31, 85, 300)
step 563, loss=0.24269, accuracy=0.9, t=12.75, inputs=(10, 19, 121, 300)
step 564, loss=0.315553, accuracy=0.8, t=9.6, inputs=(10, 32, 76, 300)
step 565, loss=0.490348, accuracy=0.9, t=12.46, inputs=(10, 35, 102, 300)
step 566, loss=0.337634, accuracy=0.8, t=7.93, inputs=(10, 19, 76, 300)
step 567, loss=0.501815, accuracy=0.8, t=30.25, inputs=(10, 15, 319, 300)
step 568, loss=0.42863, accuracy=0.7, t=13.59, inputs=(10, 27, 121, 300)
step 569, loss=0.392818, accuracy=0.8, t=12.87, inputs=(10, 30, 111, 300)
step 570, loss=0.309755, accuracy=0.8, t=14.51, inputs=(10, 48, 109, 300)
step 571, loss=0.495342, accuracy=0.7, t=11.26, inputs=(10, 54, 68, 300)
step 572, loss=1.15003, accuracy=0.5, t=15.02, inputs=(10, 41, 121, 300)
step 573, loss=0.371476, accuracy=0.8, t=6.16, inputs=(10, 25, 45, 300)
step 574, loss=0.298936, accuracy=0.9, t=12.09, inputs=(10, 12, 123, 300)
step 575, loss=0.66593, accuracy=0.8, t=10.47, inputs=(10, 40, 74, 300)
step 576, loss=0.889814, accuracy=0.6, t=10.8, inputs=(10, 18, 103, 300)
step 577, loss=0.325034, accuracy=0.9, t=11.28, inputs=(10, 23, 100, 300)
step 578, loss=0.670487, accuracy=0.6, t=9.72, inputs=(10, 19, 90, 300)
step 579, loss=0.276962, accuracy=0.8, t=8.79, inputs=(10, 25, 71, 300)
step 580, loss=0.379525, accuracy=0.7, t=8.02, inputs=(10, 18, 70, 300)
step 581, loss=0.991319, accuracy=0.6, t=7.59, inputs=(10, 26, 60, 300)
step 582, loss=0.728251, accuracy=0.6, t=10.12, inputs=(10, 25, 87, 300)
step 583, loss=0.421056, accuracy=0.7, t=8.94, inputs=(10, 36, 62, 300)
step 584, loss=0.345778, accuracy=0.8, t=11.69, inputs=(10, 22, 109, 300)
step 585, loss=0.210902, accuracy=1.0, t=6.74, inputs=(10, 22, 52, 300)
step 586, loss=0.316373, accuracy=0.9, t=10.34, inputs=(10, 36, 76, 300)
step 587, loss=0.295643, accuracy=0.9, t=6.62, inputs=(10, 22, 53, 300)
step 588, loss=0.350715, accuracy=0.9, t=11.19, inputs=(10, 28, 95, 300)
step 589, loss=0.445068, accuracy=0.6, t=9.74, inputs=(10, 32, 77, 300)
step 590, loss=0.279527, accuracy=0.9, t=10.42, inputs=(10, 24, 95, 300)
step 591, loss=0.397286, accuracy=0.9, t=15.23, inputs=(10, 26, 157, 300)
step 592, loss=0.431125, accuracy=0.8, t=15.18, inputs=(10, 45, 130, 300)
step 593, loss=0.250777, accuracy=1.0, t=7.1, inputs=(10, 22, 61, 300)
step 594, loss=0.536795, accuracy=0.8, t=13.69, inputs=(10, 23, 138, 300)
step 595, loss=0.43051, accuracy=0.7, t=8.16, inputs=(10, 23, 74, 300)
step 596, loss=0.250427, accuracy=0.9, t=6.79, inputs=(10, 19, 64, 300)
step 597, loss=0.609517, accuracy=0.7, t=7.5, inputs=(10, 22, 71, 300)
step 598, loss=0.620034, accuracy=0.8, t=8.19, inputs=(10, 26, 72, 300)
step 599, loss=0.401525, accuracy=0.7, t=9.11, inputs=(10, 23, 86, 300)
step 600, loss=0.289843, accuracy=0.9, t=12.49, inputs=(10, 59, 82, 300)
checkpoint & graph meta
checkpoint done
step 601, loss=0.148557, accuracy=1.0, t=6.75, inputs=(10, 22, 59, 300)
step 602, loss=0.628159, accuracy=0.6, t=14.34, inputs=(10, 66, 97, 300)
step 603, loss=0.218136, accuracy=0.9, t=12.87, inputs=(10, 50, 100, 300)
step 604, loss=0.400557, accuracy=0.9, t=16.52, inputs=(10, 34, 159, 300)
step 605, loss=0.19776, accuracy=1.0, t=11.58, inputs=(10, 53, 82, 300)
step 606, loss=0.190266, accuracy=0.9, t=10.4, inputs=(10, 13, 111, 300)
step 607, loss=0.282649, accuracy=0.8, t=9.93, inputs=(10, 33, 81, 300)
step 608, loss=0.77483, accuracy=0.6, t=10.3, inputs=(10, 30, 91, 300)
step 609, loss=1.09428, accuracy=0.6, t=11.68, inputs=(10, 35, 101, 300)
step 610, loss=0.102942, accuracy=1.0, t=6.51, inputs=(10, 20, 57, 300)
step 611, loss=0.221786, accuracy=0.9, t=7.87, inputs=(10, 17, 81, 300)
step 612, loss=0.897079, accuracy=0.7, t=9.44, inputs=(10, 28, 83, 300)
step 613, loss=0.826492, accuracy=0.6, t=11.33, inputs=(10, 20, 111, 300)
step 614, loss=0.227111, accuracy=1.0, t=11.11, inputs=(10, 38, 93, 300)
step 615, loss=0.747121, accuracy=0.4, t=8.54, inputs=(10, 27, 73, 300)
step 616, loss=0.442159, accuracy=0.8, t=6.64, inputs=(10, 15, 65, 300)
step 617, loss=0.411829, accuracy=0.8, t=9.04, inputs=(10, 24, 86, 300)
step 618, loss=0.642612, accuracy=0.8, t=7.65, inputs=(10, 23, 70, 300)
step 619, loss=0.433836, accuracy=0.8, t=12.75, inputs=(10, 21, 131, 300)
step 620, loss=0.598043, accuracy=0.8, t=7.2, inputs=(10, 20, 67, 300)
step 621, loss=0.278195, accuracy=1.0, t=12.47, inputs=(10, 42, 104, 300)
step 622, loss=0.393506, accuracy=0.8, t=9.34, inputs=(10, 18, 94, 300)
step 623, loss=0.419103, accuracy=0.9, t=9.84, inputs=(10, 37, 81, 300)
step 624, loss=0.318539, accuracy=0.9, t=13.56, inputs=(10, 47, 112, 300)
step 625, loss=0.361875, accuracy=0.9, t=10.75, inputs=(10, 13, 116, 300)
step 626, loss=0.366125, accuracy=0.7, t=9.95, inputs=(10, 21, 95, 300)
step 627, loss=0.218086, accuracy=1.0, t=8.7, inputs=(10, 29, 70, 300)
step 628, loss=0.337526, accuracy=0.8, t=8.54, inputs=(10, 43, 52, 300)
step 629, loss=0.791128, accuracy=0.5, t=7.02, inputs=(10, 20, 62, 300)
step 630, loss=0.566668, accuracy=0.7, t=11.2, inputs=(10, 24, 102, 300)
step 631, loss=0.397704, accuracy=0.8, t=23.97, inputs=(10, 27, 241, 300)
step 632, loss=0.257828, accuracy=0.9, t=12.57, inputs=(10, 27, 113, 300)
step 633, loss=0.237613, accuracy=1.0, t=9.05, inputs=(10, 43, 57, 300)
step 634, loss=0.415581, accuracy=0.8, t=9.14, inputs=(10, 28, 76, 300)
step 635, loss=0.168258, accuracy=0.9, t=6.55, inputs=(10, 19, 52, 300)
step 636, loss=0.391383, accuracy=0.9, t=10.27, inputs=(10, 46, 64, 300)
step 637, loss=0.272576, accuracy=1.0, t=16.39, inputs=(10, 93, 75, 300)
step 638, loss=0.358712, accuracy=0.8, t=8.23, inputs=(10, 19, 72, 300)
step 639, loss=0.679574, accuracy=0.7, t=18.45, inputs=(10, 31, 174, 300)
step 640, loss=0.312374, accuracy=0.9, t=13.0, inputs=(10, 20, 125, 300)
step 641, loss=0.214399, accuracy=0.9, t=17.43, inputs=(10, 28, 163, 300)
step 642, loss=0.384018, accuracy=0.8, t=12.3, inputs=(10, 36, 101, 300)
step 643, loss=0.160233, accuracy=0.9, t=9.55, inputs=(10, 15, 93, 300)
step 644, loss=0.301574, accuracy=0.8, t=7.48, inputs=(10, 20, 67, 300)
step 645, loss=0.290551, accuracy=0.9, t=9.78, inputs=(10, 51, 54, 300)
step 646, loss=0.182511, accuracy=1.0, t=16.86, inputs=(10, 24, 162, 300)
step 647, loss=0.244764, accuracy=0.9, t=6.5, inputs=(10, 22, 53, 300)
step 648, loss=0.439146, accuracy=0.9, t=12.37, inputs=(10, 45, 92, 300)
step 649, loss=0.435455, accuracy=0.7, t=7.21, inputs=(10, 21, 62, 300)
step 650, loss=0.246736, accuracy=0.9, t=12.47, inputs=(10, 19, 118, 300)
step 651, loss=0.301028, accuracy=0.9, t=8.49, inputs=(10, 33, 61, 300)
step 652, loss=0.44727, accuracy=0.7, t=7.38, inputs=(10, 19, 67, 300)
step 653, loss=0.176554, accuracy=1.0, t=15.04, inputs=(10, 24, 141, 300)
step 654, loss=0.210636, accuracy=0.9, t=9.49, inputs=(10, 22, 84, 300)
step 655, loss=1.05014, accuracy=0.7, t=11.32, inputs=(10, 33, 94, 300)
step 656, loss=0.909305, accuracy=0.7, t=5.81, inputs=(10, 15, 49, 300)
step 657, loss=0.367202, accuracy=0.9, t=12.46, inputs=(10, 34, 113, 300)
step 658, loss=0.453498, accuracy=0.8, t=10.35, inputs=(10, 20, 102, 300)
step 659, loss=0.806458, accuracy=0.6, t=10.07, inputs=(10, 30, 91, 300)
step 660, loss=0.846727, accuracy=0.7, t=9.94, inputs=(10, 14, 107, 300)
step 661, loss=0.324345, accuracy=0.8, t=13.35, inputs=(10, 19, 139, 300)
step 662, loss=0.459751, accuracy=0.7, t=9.43, inputs=(10, 32, 78, 300)
step 663, loss=0.144367, accuracy=1.0, t=11.28, inputs=(10, 36, 95, 300)
step 664, loss=0.453186, accuracy=0.8, t=13.03, inputs=(10, 24, 129, 300)
step 665, loss=0.507895, accuracy=0.8, t=8.19, inputs=(10, 41, 54, 300)
step 666, loss=0.172002, accuracy=1.0, t=7.05, inputs=(10, 12, 74, 300)
step 667, loss=0.298166, accuracy=0.9, t=14.06, inputs=(10, 52, 108, 300)
step 668, loss=0.387317, accuracy=0.9, t=8.33, inputs=(10, 14, 85, 300)
step 669, loss=0.338036, accuracy=0.7, t=22.4, inputs=(10, 19, 246, 300)
step 670, loss=0.378967, accuracy=0.7, t=10.17, inputs=(10, 42, 77, 300)
step 671, loss=0.833172, accuracy=0.6, t=8.58, inputs=(10, 18, 85, 300)
step 672, loss=0.268011, accuracy=0.9, t=7.34, inputs=(10, 14, 73, 300)
step 673, loss=0.244822, accuracy=0.9, t=31.79, inputs=(10, 48, 298, 300)
step 674, loss=0.242986, accuracy=0.9, t=6.95, inputs=(10, 16, 69, 300)
step 675, loss=0.203717, accuracy=1.0, t=12.68, inputs=(10, 25, 123, 300)
step 676, loss=0.413698, accuracy=0.8, t=6.08, inputs=(10, 23, 49, 300)
step 677, loss=0.506995, accuracy=0.8, t=26.44, inputs=(10, 16, 298, 300)
step 678, loss=0.267208, accuracy=0.9, t=8.66, inputs=(10, 23, 80, 300)
step 679, loss=0.139845, accuracy=1.0, t=9.71, inputs=(10, 41, 75, 300)
step 680, loss=0.423711, accuracy=0.7, t=7.65, inputs=(10, 20, 71, 300)
step 681, loss=0.632318, accuracy=0.6, t=12.46, inputs=(10, 18, 133, 300)
step 682, loss=0.421554, accuracy=0.9, t=8.68, inputs=(10, 34, 67, 300)
step 683, loss=0.760221, accuracy=0.7, t=9.65, inputs=(10, 38, 75, 300)
step 684, loss=0.334077, accuracy=0.7, t=8.28, inputs=(10, 33, 67, 300)
step 685, loss=0.302886, accuracy=0.8, t=7.51, inputs=(10, 18, 77, 300)
step 686, loss=0.349868, accuracy=0.8, t=12.44, inputs=(10, 35, 113, 300)
step 687, loss=0.188861, accuracy=1.0, t=22.71, inputs=(10, 15, 264, 300)
step 688, loss=0.705703, accuracy=0.5, t=9.45, inputs=(10, 32, 83, 300)
step 689, loss=0.358427, accuracy=0.8, t=7.54, inputs=(10, 11, 82, 300)
step 690, loss=0.289806, accuracy=0.9, t=10.28, inputs=(10, 41, 81, 300)
step 691, loss=0.36669, accuracy=0.7, t=13.89, inputs=(10, 26, 141, 300)
step 692, loss=0.411612, accuracy=0.9, t=9.19, inputs=(10, 22, 92, 300)
step 693, loss=0.632655, accuracy=0.8, t=11.44, inputs=(10, 35, 100, 300)
step 694, loss=0.287737, accuracy=0.8, t=6.89, inputs=(10, 19, 62, 300)
step 695, loss=0.623078, accuracy=0.7, t=11.67, inputs=(10, 24, 119, 300)
step 696, loss=0.362078, accuracy=0.9, t=8.52, inputs=(10, 25, 77, 300)
step 697, loss=1.01956, accuracy=0.5, t=14.11, inputs=(10, 36, 132, 300)
step 698, loss=0.136833, accuracy=1.0, t=10.02, inputs=(10, 32, 89, 300)
step 699, loss=0.0788151, accuracy=1.0, t=5.68, inputs=(10, 14, 56, 300)
step 700, loss=0.300441, accuracy=0.9, t=8.39, inputs=(10, 31, 71, 300)
checkpoint & graph meta
checkpoint done
step 701, loss=0.30398, accuracy=0.8, t=14.48, inputs=(10, 22, 152, 300)
step 702, loss=0.436695, accuracy=0.8, t=12.03, inputs=(10, 26, 121, 300)
step 703, loss=0.213976, accuracy=0.9, t=8.65, inputs=(10, 43, 59, 300)
step 704, loss=0.439444, accuracy=0.8, t=8.89, inputs=(10, 26, 81, 300)
step 705, loss=0.811801, accuracy=0.6, t=9.5, inputs=(10, 49, 64, 300)
step 706, loss=0.203115, accuracy=1.0, t=6.44, inputs=(10, 17, 58, 300)
step 707, loss=0.234407, accuracy=0.9, t=7.35, inputs=(10, 24, 66, 300)
step 708, loss=0.462713, accuracy=0.7, t=14.98, inputs=(10, 33, 147, 300)
step 709, loss=0.463658, accuracy=0.8, t=10.99, inputs=(10, 32, 101, 300)
step 710, loss=0.45032, accuracy=0.7, t=8.69, inputs=(10, 28, 76, 300)
step 711, loss=0.467641, accuracy=0.7, t=14.44, inputs=(10, 35, 140, 300)
step 712, loss=0.898365, accuracy=0.6, t=7.09, inputs=(10, 23, 66, 300)
step 713, loss=0.21389, accuracy=0.9, t=9.92, inputs=(10, 18, 103, 300)
step 714, loss=0.485666, accuracy=0.9, t=10.82, inputs=(10, 48, 80, 300)
step 715, loss=0.329274, accuracy=0.9, t=10.58, inputs=(10, 12, 117, 300)
step 716, loss=0.496774, accuracy=0.7, t=7.73, inputs=(10, 18, 77, 300)
step 717, loss=0.579961, accuracy=0.8, t=12.65, inputs=(10, 31, 118, 300)
step 718, loss=0.427153, accuracy=0.9, t=7.14, inputs=(10, 24, 64, 300)
step 719, loss=0.842991, accuracy=0.5, t=14.12, inputs=(10, 22, 145, 300)
step 720, loss=0.312531, accuracy=0.9, t=13.68, inputs=(10, 32, 128, 300)
step 721, loss=0.832975, accuracy=0.5, t=8.36, inputs=(10, 17, 82, 300)
step 722, loss=0.434488, accuracy=0.8, t=5.88, inputs=(10, 17, 57, 300)
step 723, loss=0.650496, accuracy=0.8, t=6.18, inputs=(10, 21, 55, 300)
step 724, loss=0.40619, accuracy=0.8, t=8.6, inputs=(10, 25, 80, 300)
step 725, loss=0.18199, accuracy=1.0, t=7.82, inputs=(10, 20, 76, 300)
step 726, loss=0.429419, accuracy=0.7, t=10.21, inputs=(10, 45, 75, 300)
step 727, loss=0.486289, accuracy=0.7, t=9.32, inputs=(10, 27, 84, 300)
step 728, loss=0.706835, accuracy=0.7, t=6.81, inputs=(10, 24, 57, 300)
step 729, loss=0.476903, accuracy=0.6, t=7.74, inputs=(10, 15, 80, 300)
step 730, loss=0.474474, accuracy=0.8, t=10.22, inputs=(10, 18, 107, 300)
step 731, loss=0.240862, accuracy=1.0, t=7.64, inputs=(10, 15, 77, 300)
step 732, loss=0.330595, accuracy=0.8, t=11.79, inputs=(10, 32, 111, 300)
step 733, loss=0.434725, accuracy=0.7, t=12.28, inputs=(10, 25, 126, 300)
step 734, loss=0.434219, accuracy=0.8, t=14.43, inputs=(10, 32, 142, 300)
step 735, loss=0.402711, accuracy=0.8, t=16.54, inputs=(10, 53, 140, 300)
step 736, loss=0.354133, accuracy=0.8, t=8.29, inputs=(10, 29, 72, 300)
step 737, loss=0.61957, accuracy=0.7, t=11.33, inputs=(10, 18, 120, 300)
step 738, loss=0.305475, accuracy=0.9, t=18.63, inputs=(10, 39, 182, 300)
step 739, loss=0.593284, accuracy=0.7, t=8.02, inputs=(10, 37, 61, 300)
step 740, loss=0.299574, accuracy=0.9, t=10.38, inputs=(10, 50, 75, 300)
step 741, loss=0.374883, accuracy=0.9, t=8.56, inputs=(10, 33, 71, 300)
step 742, loss=0.502318, accuracy=0.8, t=9.53, inputs=(10, 29, 85, 300)
step 743, loss=0.316477, accuracy=1.0, t=9.63, inputs=(10, 51, 65, 300)
step 744, loss=0.529523, accuracy=0.8, t=9.55, inputs=(10, 39, 78, 300)
step 745, loss=0.163766, accuracy=1.0, t=8.04, inputs=(10, 18, 81, 300)
step 746, loss=0.2978, accuracy=0.8, t=6.36, inputs=(10, 27, 51, 300)
step 747, loss=0.530568, accuracy=0.8, t=13.7, inputs=(10, 31, 133, 300)
step 748, loss=0.391008, accuracy=0.8, t=27.49, inputs=(10, 11, 322, 300)
step 749, loss=0.691339, accuracy=0.8, t=20.57, inputs=(10, 17, 226, 300)
step 750, loss=0.544833, accuracy=0.7, t=9.38, inputs=(10, 33, 79, 300)
step 751, loss=0.877647, accuracy=0.7, t=8.96, inputs=(10, 36, 70, 300)
step 752, loss=0.319018, accuracy=0.9, t=12.13, inputs=(10, 78, 65, 300)
step 753, loss=0.436702, accuracy=0.9, t=11.26, inputs=(10, 49, 80, 300)
step 754, loss=0.408743, accuracy=0.9, t=8.33, inputs=(10, 28, 72, 300)
step 755, loss=0.374025, accuracy=0.8, t=9.52, inputs=(10, 33, 79, 300)
step 756, loss=0.307053, accuracy=0.9, t=5.01, inputs=(10, 19, 44, 300)
step 757, loss=0.193859, accuracy=0.9, t=8.29, inputs=(10, 18, 81, 300)
step 758, loss=0.457819, accuracy=0.8, t=6.22, inputs=(10, 25, 51, 300)
step 759, loss=0.353126, accuracy=0.9, t=7.11, inputs=(10, 20, 67, 300)
step 760, loss=0.621251, accuracy=0.8, t=8.66, inputs=(10, 13, 92, 300)
step 761, loss=0.37234, accuracy=0.7, t=7.96, inputs=(10, 22, 71, 300)
step 762, loss=0.464587, accuracy=0.8, t=8.72, inputs=(10, 26, 78, 300)
step 763, loss=0.596528, accuracy=0.7, t=8.95, inputs=(10, 26, 84, 300)
step 764, loss=0.342468, accuracy=0.9, t=10.84, inputs=(10, 26, 103, 300)
step 765, loss=0.824911, accuracy=0.7, t=5.85, inputs=(10, 21, 49, 300)
step 766, loss=0.413954, accuracy=0.7, t=6.34, inputs=(10, 18, 60, 300)
step 767, loss=0.423941, accuracy=0.8, t=14.11, inputs=(10, 25, 144, 300)
step 768, loss=0.52834, accuracy=0.7, t=7.54, inputs=(10, 15, 76, 300)
step 769, loss=0.505606, accuracy=0.7, t=11.61, inputs=(10, 40, 101, 300)
step 770, loss=0.506405, accuracy=0.8, t=9.03, inputs=(10, 28, 84, 300)
step 771, loss=0.244802, accuracy=0.9, t=9.14, inputs=(10, 18, 93, 300)
step 772, loss=0.729164, accuracy=0.8, t=7.83, inputs=(10, 24, 69, 300)
step 773, loss=0.476228, accuracy=0.8, t=7.78, inputs=(10, 21, 73, 300)
step 774, loss=0.311705, accuracy=1.0, t=19.82, inputs=(10, 54, 180, 300)
step 775, loss=0.437794, accuracy=0.8, t=11.11, inputs=(10, 32, 102, 300)
step 776, loss=0.256262, accuracy=0.9, t=21.42, inputs=(10, 15, 239, 300)
step 777, loss=0.419412, accuracy=0.8, t=7.3, inputs=(10, 16, 70, 300)
step 778, loss=0.264974, accuracy=0.9, t=7.28, inputs=(10, 37, 50, 300)
step 779, loss=0.221872, accuracy=1.0, t=8.8, inputs=(10, 25, 81, 300)
step 780, loss=0.311132, accuracy=0.9, t=9.2, inputs=(10, 17, 96, 300)
step 781, loss=0.289826, accuracy=1.0, t=14.08, inputs=(10, 43, 124, 300)
step 782, loss=0.203255, accuracy=1.0, t=7.45, inputs=(10, 15, 74, 300)
step 783, loss=0.646191, accuracy=0.6, t=17.62, inputs=(10, 43, 166, 300)
step 784, loss=0.467975, accuracy=0.7, t=10.22, inputs=(10, 27, 96, 300)
step 785, loss=0.24222, accuracy=0.9, t=8.74, inputs=(10, 15, 87, 300)
step 786, loss=0.353029, accuracy=0.8, t=9.66, inputs=(10, 17, 99, 300)
step 787, loss=0.278397, accuracy=0.9, t=11.51, inputs=(10, 23, 116, 300)
step 788, loss=0.32838, accuracy=0.9, t=7.1, inputs=(10, 34, 53, 300)
step 789, loss=0.246863, accuracy=0.9, t=9.61, inputs=(10, 31, 86, 300)
step 790, loss=0.975459, accuracy=0.8, t=12.01, inputs=(10, 24, 121, 300)
step 791, loss=0.116939, accuracy=1.0, t=11.47, inputs=(10, 26, 113, 300)
step 792, loss=0.473397, accuracy=0.9, t=13.38, inputs=(10, 26, 133, 300)
step 793, loss=0.637363, accuracy=0.7, t=6.61, inputs=(10, 16, 65, 300)
step 794, loss=0.197624, accuracy=0.9, t=10.11, inputs=(10, 10, 107, 300)
step 795, loss=0.669643, accuracy=0.7, t=10.02, inputs=(10, 25, 98, 300)
step 796, loss=0.253424, accuracy=0.9, t=8.59, inputs=(10, 29, 78, 300)
step 797, loss=0.172169, accuracy=0.9, t=8.22, inputs=(10, 41, 59, 300)
step 798, loss=0.326026, accuracy=0.9, t=7.48, inputs=(10, 34, 58, 300)
step 799, loss=0.402025, accuracy=0.7, t=7.28, inputs=(10, 41, 48, 300)
step 800, loss=0.728362, accuracy=0.6, t=7.81, inputs=(10, 20, 76, 300)
checkpoint & graph meta
checkpoint done
step 801, loss=0.181754, accuracy=0.9, t=15.37, inputs=(10, 32, 151, 300)
step 802, loss=0.233407, accuracy=0.9, t=7.95, inputs=(10, 21, 78, 300)
step 803, loss=0.474612, accuracy=0.8, t=6.13, inputs=(10, 26, 50, 300)
step 804, loss=0.250413, accuracy=0.9, t=9.3, inputs=(10, 36, 74, 300)
step 805, loss=0.23952, accuracy=1.0, t=9.42, inputs=(10, 34, 76, 300)
step 806, loss=0.278592, accuracy=0.8, t=8.19, inputs=(10, 24, 78, 300)
step 807, loss=0.155395, accuracy=1.0, t=6.99, inputs=(10, 21, 62, 300)
step 808, loss=0.344628, accuracy=0.8, t=6.41, inputs=(10, 20, 59, 300)
step 809, loss=0.264222, accuracy=0.8, t=9.67, inputs=(10, 34, 81, 300)
step 810, loss=0.395292, accuracy=0.8, t=10.66, inputs=(10, 34, 96, 300)
step 811, loss=0.187876, accuracy=1.0, t=6.33, inputs=(10, 12, 69, 300)
step 812, loss=0.272119, accuracy=0.9, t=8.67, inputs=(10, 26, 79, 300)
step 813, loss=0.233775, accuracy=0.9, t=8.07, inputs=(10, 22, 78, 300)
step 814, loss=0.100267, accuracy=1.0, t=7.9, inputs=(10, 23, 72, 300)
step 815, loss=0.190342, accuracy=0.9, t=7.03, inputs=(10, 17, 70, 300)
step 816, loss=0.56477, accuracy=0.9, t=6.69, inputs=(10, 21, 61, 300)
step 817, loss=0.843193, accuracy=0.6, t=10.19, inputs=(10, 38, 85, 300)
step 818, loss=0.357181, accuracy=0.9, t=8.52, inputs=(10, 40, 61, 300)
step 819, loss=0.532538, accuracy=0.9, t=10.01, inputs=(10, 22, 100, 300)
step 820, loss=1.41771, accuracy=0.5, t=11.05, inputs=(10, 22, 111, 300)
step 821, loss=0.742202, accuracy=0.6, t=7.61, inputs=(10, 25, 69, 300)
step 822, loss=0.271816, accuracy=0.9, t=4.85, inputs=(10, 12, 48, 300)
step 823, loss=0.548424, accuracy=0.8, t=14.5, inputs=(10, 42, 130, 300)
step 824, loss=0.144201, accuracy=1.0, t=5.76, inputs=(10, 16, 56, 300)
step 825, loss=0.491664, accuracy=0.7, t=9.1, inputs=(10, 37, 72, 300)
step 826, loss=0.510098, accuracy=0.8, t=6.09, inputs=(10, 19, 60, 300)
step 827, loss=0.289185, accuracy=0.9, t=10.07, inputs=(10, 35, 86, 300)
step 828, loss=0.417117, accuracy=0.9, t=12.54, inputs=(10, 48, 102, 300)
step 829, loss=0.195933, accuracy=1.0, t=7.14, inputs=(10, 28, 59, 300)
step 830, loss=0.313318, accuracy=0.9, t=10.96, inputs=(10, 22, 112, 300)
step 831, loss=0.493662, accuracy=0.7, t=11.45, inputs=(10, 23, 116, 300)
step 832, loss=0.49316, accuracy=0.8, t=7.26, inputs=(10, 22, 67, 300)
step 833, loss=1.1289, accuracy=0.5, t=8.64, inputs=(10, 28, 76, 300)
step 834, loss=0.437238, accuracy=0.8, t=11.78, inputs=(10, 54, 83, 300)
step 835, loss=0.25767, accuracy=0.9, t=7.28, inputs=(10, 22, 65, 300)
step 836, loss=0.120489, accuracy=1.0, t=7.35, inputs=(10, 18, 73, 300)
step 837, loss=0.348426, accuracy=0.9, t=6.52, inputs=(10, 26, 55, 300)
step 838, loss=0.297805, accuracy=0.9, t=9.16, inputs=(10, 41, 60, 300)
step 839, loss=0.234721, accuracy=0.9, t=9.65, inputs=(10, 31, 75, 300)
step 840, loss=0.248384, accuracy=1.0, t=17.38, inputs=(10, 35, 154, 300)
step 841, loss=0.306147, accuracy=0.9, t=6.95, inputs=(10, 22, 56, 300)
step 842, loss=0.265077, accuracy=0.9, t=18.65, inputs=(10, 44, 169, 300)
step 843, loss=0.432599, accuracy=0.8, t=9.91, inputs=(10, 24, 98, 300)
step 844, loss=0.414098, accuracy=0.9, t=10.1, inputs=(10, 24, 100, 300)
step 845, loss=0.461522, accuracy=0.8, t=31.62, inputs=(10, 19, 357, 300)
step 846, loss=0.281699, accuracy=0.8, t=9.79, inputs=(10, 30, 84, 300)
step 847, loss=0.446867, accuracy=0.9, t=7.36, inputs=(10, 15, 74, 300)
step 848, loss=0.226848, accuracy=0.8, t=9.68, inputs=(10, 24, 89, 300)
step 849, loss=0.307628, accuracy=0.9, t=17.5, inputs=(10, 27, 181, 300)
step 850, loss=0.514795, accuracy=0.6, t=7.8, inputs=(10, 27, 67, 300)
step 851, loss=0.529833, accuracy=0.7, t=16.96, inputs=(10, 23, 183, 300)
step 852, loss=0.607409, accuracy=0.8, t=8.94, inputs=(10, 33, 74, 300)
step 853, loss=0.202167, accuracy=1.0, t=9.32, inputs=(10, 19, 96, 300)
step 854, loss=0.52098, accuracy=0.7, t=18.29, inputs=(10, 36, 182, 300)
step 855, loss=0.641108, accuracy=0.7, t=8.59, inputs=(10, 21, 83, 300)
step 856, loss=0.328012, accuracy=0.8, t=11.72, inputs=(10, 37, 100, 300)
step 857, loss=0.342644, accuracy=0.9, t=10.55, inputs=(10, 14, 115, 300)
step 858, loss=0.23124, accuracy=0.9, t=7.08, inputs=(10, 26, 58, 300)
step 859, loss=0.468839, accuracy=0.8, t=6.08, inputs=(10, 15, 61, 300)
step 860, loss=0.582112, accuracy=0.7, t=14.1, inputs=(10, 33, 137, 300)
step 861, loss=0.149692, accuracy=1.0, t=10.3, inputs=(10, 38, 89, 300)
step 862, loss=0.321423, accuracy=0.8, t=10.1, inputs=(10, 35, 87, 300)
step 863, loss=0.44217, accuracy=0.7, t=9.62, inputs=(10, 52, 60, 300)
step 864, loss=1.12723, accuracy=0.3, t=8.6, inputs=(10, 27, 78, 300)
step 865, loss=0.279637, accuracy=0.9, t=23.97, inputs=(10, 30, 258, 300)
step 866, loss=0.877046, accuracy=0.6, t=9.2, inputs=(10, 16, 98, 300)
step 867, loss=0.715959, accuracy=0.7, t=11.41, inputs=(10, 20, 120, 300)
step 868, loss=0.625128, accuracy=0.6, t=11.29, inputs=(10, 60, 72, 300)
step 869, loss=0.178315, accuracy=0.9, t=8.3, inputs=(10, 24, 78, 300)
step 870, loss=0.311023, accuracy=0.8, t=11.96, inputs=(10, 19, 124, 300)
step 871, loss=0.472787, accuracy=0.6, t=6.21, inputs=(10, 22, 54, 300)
step 872, loss=0.495252, accuracy=0.7, t=7.58, inputs=(10, 18, 72, 300)
step 873, loss=0.325836, accuracy=0.8, t=9.8, inputs=(10, 34, 87, 300)
step 874, loss=0.472687, accuracy=0.9, t=11.02, inputs=(10, 26, 108, 300)
step 875, loss=0.407787, accuracy=0.9, t=9.99, inputs=(10, 69, 47, 300)
step 876, loss=0.435236, accuracy=0.8, t=8.14, inputs=(10, 15, 84, 300)
step 877, loss=0.288635, accuracy=0.9, t=9.7, inputs=(10, 22, 98, 300)
step 878, loss=0.337229, accuracy=0.8, t=11.01, inputs=(10, 29, 101, 300)
step 879, loss=0.316527, accuracy=0.9, t=17.19, inputs=(10, 41, 161, 300)
step 880, loss=0.524047, accuracy=0.8, t=13.32, inputs=(10, 67, 86, 300)
step 881, loss=0.211658, accuracy=0.9, t=7.46, inputs=(10, 20, 68, 300)
step 882, loss=0.58709, accuracy=0.7, t=14.71, inputs=(10, 31, 146, 300)
step 883, loss=0.665583, accuracy=0.7, t=9.43, inputs=(10, 26, 82, 300)
step 884, loss=0.239052, accuracy=0.9, t=14.47, inputs=(10, 50, 115, 300)
step 885, loss=0.669124, accuracy=0.7, t=6.78, inputs=(10, 17, 67, 300)
step 886, loss=0.389121, accuracy=0.8, t=9.12, inputs=(10, 27, 83, 300)
step 887, loss=0.216845, accuracy=0.9, t=7.18, inputs=(10, 18, 66, 300)
step 888, loss=0.260352, accuracy=0.9, t=7.51, inputs=(10, 30, 60, 300)
step 889, loss=0.510933, accuracy=0.8, t=11.45, inputs=(10, 31, 108, 300)
step 890, loss=0.380198, accuracy=0.9, t=5.86, inputs=(10, 13, 56, 300)
step 891, loss=0.392789, accuracy=0.9, t=9.39, inputs=(10, 20, 94, 300)
step 892, loss=0.30779, accuracy=0.9, t=8.69, inputs=(10, 19, 86, 300)
step 893, loss=0.510667, accuracy=0.7, t=12.58, inputs=(10, 38, 114, 300)
step 894, loss=0.199203, accuracy=1.0, t=8.19, inputs=(10, 29, 69, 300)
step 895, loss=0.586469, accuracy=0.8, t=9.28, inputs=(10, 32, 78, 300)
step 896, loss=0.667744, accuracy=0.8, t=7.38, inputs=(10, 31, 56, 300)
step 897, loss=0.229403, accuracy=0.9, t=9.33, inputs=(10, 23, 87, 300)
step 898, loss=0.286616, accuracy=0.8, t=11.89, inputs=(10, 36, 106, 300)
step 899, loss=0.632999, accuracy=0.7, t=10.24, inputs=(10, 54, 66, 300)
step 900, loss=0.396228, accuracy=0.8, t=8.46, inputs=(10, 21, 83, 300)
checkpoint & graph meta
checkpoint done
step 901, loss=0.158211, accuracy=1.0, t=9.97, inputs=(10, 34, 83, 300)
step 902, loss=0.413395, accuracy=0.8, t=13.66, inputs=(10, 38, 128, 300)
step 903, loss=0.39951, accuracy=0.9, t=12.21, inputs=(10, 36, 106, 300)
step 904, loss=0.478427, accuracy=0.7, t=26.64, inputs=(10, 24, 284, 300)
step 905, loss=0.412506, accuracy=0.8, t=8.01, inputs=(10, 40, 54, 300)
step 906, loss=0.715759, accuracy=0.5, t=13.48, inputs=(10, 39, 120, 300)
step 907, loss=1.47006, accuracy=0.3, t=6.77, inputs=(10, 17, 67, 300)
step 908, loss=0.189893, accuracy=1.0, t=8.28, inputs=(10, 19, 82, 300)
step 909, loss=0.452692, accuracy=0.8, t=19.46, inputs=(10, 16, 218, 300)
step 910, loss=0.459181, accuracy=0.8, t=11.77, inputs=(10, 23, 115, 300)
step 911, loss=0.276422, accuracy=0.9, t=10.71, inputs=(10, 46, 84, 300)
step 912, loss=0.663066, accuracy=0.7, t=8.77, inputs=(10, 33, 71, 300)
step 913, loss=0.290237, accuracy=0.9, t=8.94, inputs=(10, 28, 78, 300)
step 914, loss=0.309446, accuracy=0.8, t=7.3, inputs=(10, 19, 66, 300)
step 915, loss=0.7042, accuracy=0.5, t=7.42, inputs=(10, 16, 74, 300)
step 916, loss=0.559466, accuracy=0.7, t=5.27, inputs=(10, 13, 52, 300)
step 917, loss=0.360195, accuracy=0.8, t=11.18, inputs=(10, 20, 116, 300)
step 918, loss=0.612429, accuracy=0.6, t=9.21, inputs=(10, 27, 83, 300)
step 919, loss=0.35477, accuracy=0.8, t=9.41, inputs=(10, 37, 75, 300)
step 920, loss=0.550652, accuracy=0.7, t=10.79, inputs=(10, 31, 100, 300)
step 921, loss=0.185833, accuracy=1.0, t=7.45, inputs=(10, 17, 72, 300)
step 922, loss=0.721228, accuracy=0.5, t=9.36, inputs=(10, 49, 62, 300)
step 923, loss=0.501659, accuracy=0.7, t=9.65, inputs=(10, 32, 84, 300)
step 924, loss=0.379269, accuracy=0.8, t=13.3, inputs=(10, 41, 119, 300)
step 925, loss=0.419077, accuracy=0.9, t=9.97, inputs=(10, 46, 72, 300)
step 926, loss=0.608303, accuracy=0.6, t=8.6, inputs=(10, 30, 77, 300)
step 927, loss=0.523973, accuracy=0.7, t=7.62, inputs=(10, 27, 64, 300)
step 928, loss=0.14424, accuracy=1.0, t=11.72, inputs=(10, 34, 107, 300)
step 929, loss=0.196032, accuracy=1.0, t=7.41, inputs=(10, 26, 63, 300)
step 930, loss=0.240801, accuracy=1.0, t=6.94, inputs=(10, 15, 71, 300)
step 931, loss=0.305048, accuracy=1.0, t=11.54, inputs=(10, 27, 109, 300)
step 932, loss=0.237476, accuracy=1.0, t=7.52, inputs=(10, 29, 65, 300)
step 933, loss=0.360531, accuracy=0.8, t=12.28, inputs=(10, 36, 110, 300)
step 934, loss=0.342967, accuracy=0.8, t=12.27, inputs=(10, 30, 121, 300)
step 935, loss=0.217178, accuracy=1.0, t=5.19, inputs=(10, 15, 50, 300)
step 936, loss=0.251184, accuracy=0.9, t=12.37, inputs=(10, 16, 131, 300)
step 937, loss=0.202812, accuracy=1.0, t=7.92, inputs=(10, 19, 77, 300)
step 938, loss=0.295244, accuracy=0.9, t=7.3, inputs=(10, 17, 74, 300)
step 939, loss=0.448106, accuracy=0.8, t=14.0, inputs=(10, 54, 111, 300)
step 940, loss=0.287287, accuracy=0.9, t=7.22, inputs=(10, 12, 78, 300)
step 941, loss=0.446952, accuracy=0.8, t=15.02, inputs=(10, 52, 127, 300)
step 942, loss=0.411352, accuracy=0.8, t=7.81, inputs=(10, 27, 69, 300)
step 943, loss=0.457238, accuracy=0.8, t=8.41, inputs=(10, 35, 69, 300)
step 944, loss=0.271686, accuracy=0.9, t=6.58, inputs=(10, 15, 67, 300)
step 945, loss=0.486041, accuracy=0.8, t=11.83, inputs=(10, 31, 114, 300)
step 946, loss=0.567859, accuracy=0.7, t=6.17, inputs=(10, 28, 48, 300)
step 947, loss=0.583396, accuracy=0.7, t=9.43, inputs=(10, 37, 78, 300)
step 948, loss=0.408231, accuracy=0.8, t=11.84, inputs=(10, 30, 111, 300)
step 949, loss=0.571681, accuracy=0.5, t=10.41, inputs=(10, 22, 106, 300)
step 950, loss=0.319352, accuracy=0.8, t=6.72, inputs=(10, 26, 53, 300)
step 951, loss=0.551768, accuracy=0.7, t=6.27, inputs=(10, 23, 55, 300)
step 952, loss=0.14722, accuracy=0.9, t=10.12, inputs=(10, 37, 85, 300)
step 953, loss=0.377813, accuracy=0.8, t=6.89, inputs=(10, 29, 54, 300)
step 954, loss=0.632801, accuracy=0.8, t=12.77, inputs=(10, 12, 141, 300)
step 955, loss=0.445229, accuracy=0.8, t=7.18, inputs=(10, 22, 69, 300)
step 956, loss=0.639145, accuracy=0.6, t=7.31, inputs=(10, 30, 61, 300)
step 957, loss=0.405504, accuracy=0.8, t=18.77, inputs=(10, 22, 206, 300)
step 958, loss=0.578206, accuracy=0.6, t=8.4, inputs=(10, 14, 91, 300)
step 959, loss=0.408165, accuracy=0.8, t=14.09, inputs=(10, 56, 112, 300)
step 960, loss=0.174659, accuracy=1.0, t=7.24, inputs=(10, 20, 70, 300)
step 961, loss=0.499282, accuracy=0.8, t=8.87, inputs=(10, 28, 77, 300)
step 962, loss=0.224247, accuracy=1.0, t=12.45, inputs=(10, 54, 93, 300)
step 963, loss=0.177165, accuracy=1.0, t=3.93, inputs=(10, 7, 44, 300)
step 964, loss=0.413152, accuracy=0.8, t=8.34, inputs=(10, 25, 77, 300)
step 965, loss=0.710924, accuracy=0.5, t=9.44, inputs=(10, 32, 81, 300)
step 966, loss=0.908766, accuracy=0.4, t=9.41, inputs=(10, 35, 77, 300)
step 967, loss=0.328535, accuracy=0.7, t=10.82, inputs=(10, 50, 78, 300)
step 968, loss=0.700616, accuracy=0.7, t=10.4, inputs=(10, 20, 107, 300)
step 969, loss=0.36464, accuracy=0.8, t=7.71, inputs=(10, 15, 80, 300)
step 970, loss=0.330949, accuracy=0.8, t=6.73, inputs=(10, 16, 65, 300)
step 971, loss=0.624032, accuracy=0.7, t=8.32, inputs=(10, 18, 81, 300)
step 972, loss=0.376703, accuracy=0.9, t=16.96, inputs=(10, 49, 152, 300)
step 973, loss=0.412523, accuracy=0.8, t=10.19, inputs=(10, 20, 105, 300)
step 974, loss=0.274856, accuracy=0.9, t=9.38, inputs=(10, 41, 73, 300)
step 975, loss=0.342582, accuracy=0.8, t=8.41, inputs=(10, 24, 77, 300)
step 976, loss=0.317866, accuracy=0.9, t=16.99, inputs=(10, 39, 161, 300)
step 977, loss=0.509938, accuracy=0.7, t=9.25, inputs=(10, 18, 97, 300)
step 978, loss=0.392112, accuracy=0.7, t=15.47, inputs=(10, 22, 160, 300)
step 979, loss=0.536484, accuracy=0.8, t=6.94, inputs=(10, 14, 71, 300)
step 980, loss=0.485633, accuracy=0.8, t=7.77, inputs=(10, 30, 65, 300)
step 981, loss=0.276696, accuracy=0.8, t=7.95, inputs=(10, 21, 76, 300)
step 982, loss=0.626522, accuracy=0.7, t=12.21, inputs=(10, 27, 121, 300)
step 983, loss=0.352623, accuracy=0.8, t=9.79, inputs=(10, 54, 62, 300)
step 984, loss=0.301125, accuracy=1.0, t=5.81, inputs=(10, 25, 47, 300)
step 985, loss=0.417253, accuracy=0.7, t=8.91, inputs=(10, 17, 89, 300)
step 986, loss=0.823988, accuracy=0.5, t=5.55, inputs=(10, 21, 47, 300)
step 987, loss=0.324104, accuracy=0.9, t=4.82, inputs=(10, 16, 45, 300)
step 988, loss=0.287384, accuracy=0.9, t=9.59, inputs=(10, 19, 97, 300)
step 989, loss=0.274808, accuracy=0.9, t=12.64, inputs=(10, 32, 120, 300)
step 990, loss=0.286091, accuracy=0.9, t=9.5, inputs=(10, 27, 87, 300)
step 991, loss=0.552778, accuracy=0.8, t=9.09, inputs=(10, 24, 86, 300)
step 992, loss=0.421152, accuracy=0.8, t=10.4, inputs=(10, 39, 84, 300)
step 993, loss=1.06303, accuracy=0.5, t=7.45, inputs=(10, 11, 81, 300)
step 994, loss=0.434816, accuracy=0.9, t=8.02, inputs=(10, 18, 77, 300)
step 995, loss=0.726032, accuracy=0.5, t=8.36, inputs=(10, 30, 71, 300)
step 996, loss=0.914392, accuracy=0.6, t=10.07, inputs=(10, 35, 83, 300)
step 997, loss=0.511091, accuracy=0.8, t=12.46, inputs=(10, 14, 136, 300)
step 998, loss=0.527092, accuracy=0.8, t=9.49, inputs=(10, 24, 88, 300)
step 999, loss=0.425733, accuracy=0.7, t=9.35, inputs=(10, 21, 91, 300)
step 1000, loss=0.191224, accuracy=1.0, t=7.36, inputs=(10, 21, 69, 300)
checkpoint & graph meta
checkpoint done
step 1001, loss=0.346256, accuracy=0.8, t=16.2, inputs=(10, 54, 134, 300)
step 1002, loss=0.352749, accuracy=0.9, t=8.0, inputs=(10, 34, 59, 300)
step 1003, loss=0.545653, accuracy=0.8, t=8.37, inputs=(10, 22, 79, 300)
step 1004, loss=0.30869, accuracy=0.9, t=9.29, inputs=(10, 42, 67, 300)
step 1005, loss=0.317596, accuracy=1.0, t=9.32, inputs=(10, 31, 84, 300)
step 1006, loss=0.341185, accuracy=0.8, t=10.19, inputs=(10, 29, 92, 300)
step 1007, loss=0.218153, accuracy=1.0, t=9.68, inputs=(10, 28, 90, 300)
step 1008, loss=0.842551, accuracy=0.6, t=11.18, inputs=(10, 24, 114, 300)
step 1009, loss=0.292479, accuracy=0.8, t=8.05, inputs=(10, 24, 73, 300)
step 1010, loss=0.41386, accuracy=0.9, t=13.51, inputs=(10, 40, 119, 300)
step 1011, loss=0.321672, accuracy=0.8, t=8.68, inputs=(10, 30, 73, 300)
step 1012, loss=0.356414, accuracy=0.9, t=8.99, inputs=(10, 31, 79, 300)
step 1013, loss=0.197433, accuracy=1.0, t=16.06, inputs=(10, 35, 158, 300)
step 1014, loss=0.301781, accuracy=1.0, t=7.86, inputs=(10, 19, 78, 300)
step 1015, loss=0.290965, accuracy=0.9, t=11.51, inputs=(10, 44, 92, 300)
step 1016, loss=0.19561, accuracy=1.0, t=7.82, inputs=(10, 36, 60, 300)
step 1017, loss=0.194785, accuracy=1.0, t=7.48, inputs=(10, 26, 65, 300)
step 1018, loss=0.663859, accuracy=0.7, t=10.14, inputs=(10, 30, 91, 300)
step 1019, loss=0.384719, accuracy=0.8, t=9.24, inputs=(10, 43, 66, 300)
step 1020, loss=0.302044, accuracy=0.9, t=7.51, inputs=(10, 27, 64, 300)
step 1021, loss=0.568095, accuracy=0.7, t=8.8, inputs=(10, 34, 73, 300)
step 1022, loss=0.301845, accuracy=0.8, t=10.11, inputs=(10, 30, 92, 300)
step 1023, loss=0.518177, accuracy=0.6, t=11.11, inputs=(10, 25, 102, 300)
step 1024, loss=0.280149, accuracy=0.8, t=7.19, inputs=(10, 20, 66, 300)
step 1025, loss=0.419453, accuracy=0.8, t=12.32, inputs=(10, 19, 132, 300)
step 1026, loss=0.237286, accuracy=0.9, t=10.03, inputs=(10, 41, 76, 300)
step 1027, loss=0.612793, accuracy=0.7, t=5.47, inputs=(10, 17, 48, 300)
step 1028, loss=0.329522, accuracy=0.9, t=23.71, inputs=(10, 16, 270, 300)
step 1029, loss=0.442658, accuracy=0.8, t=6.89, inputs=(10, 25, 56, 300)
step 1030, loss=0.2159, accuracy=0.9, t=12.72, inputs=(10, 21, 132, 300)
step 1031, loss=0.484399, accuracy=0.7, t=12.1, inputs=(10, 41, 99, 300)
step 1032, loss=0.537609, accuracy=0.8, t=7.99, inputs=(10, 31, 63, 300)
step 1033, loss=0.360288, accuracy=0.8, t=7.85, inputs=(10, 18, 80, 300)
step 1034, loss=0.434718, accuracy=0.9, t=8.94, inputs=(10, 42, 64, 300)
step 1035, loss=0.161922, accuracy=1.0, t=6.37, inputs=(10, 22, 53, 300)
step 1036, loss=0.577267, accuracy=0.8, t=18.79, inputs=(10, 58, 157, 300)
step 1037, loss=0.280378, accuracy=0.9, t=7.33, inputs=(10, 22, 64, 300)
step 1038, loss=0.287725, accuracy=1.0, t=8.38, inputs=(10, 33, 69, 300)
step 1039, loss=0.767259, accuracy=0.6, t=8.83, inputs=(10, 36, 72, 300)
step 1040, loss=0.448771, accuracy=0.8, t=10.03, inputs=(10, 32, 91, 300)
step 1041, loss=0.307365, accuracy=0.8, t=6.81, inputs=(10, 18, 65, 300)
step 1042, loss=0.684101, accuracy=0.9, t=6.21, inputs=(10, 16, 61, 300)
step 1043, loss=0.652537, accuracy=0.6, t=11.17, inputs=(10, 32, 103, 300)
step 1044, loss=0.290622, accuracy=0.9, t=16.54, inputs=(10, 25, 177, 300)
step 1045, loss=0.372736, accuracy=0.9, t=13.45, inputs=(10, 43, 116, 300)
step 1046, loss=0.487292, accuracy=0.7, t=9.56, inputs=(10, 30, 84, 300)
step 1047, loss=0.184469, accuracy=1.0, t=13.02, inputs=(10, 41, 116, 300)
step 1048, loss=0.358991, accuracy=0.7, t=11.83, inputs=(10, 20, 126, 300)
step 1049, loss=0.697094, accuracy=0.7, t=5.68, inputs=(10, 16, 56, 300)
step 1050, loss=0.512918, accuracy=0.7, t=11.08, inputs=(10, 49, 84, 300)
step 1051, loss=0.505132, accuracy=0.8, t=5.71, inputs=(10, 16, 56, 300)
step 1052, loss=0.268834, accuracy=0.9, t=8.82, inputs=(10, 39, 68, 300)
step 1053, loss=0.264104, accuracy=0.9, t=10.63, inputs=(10, 27, 101, 300)
step 1054, loss=0.480929, accuracy=0.9, t=12.19, inputs=(10, 29, 120, 300)
step 1055, loss=0.206479, accuracy=0.9, t=17.25, inputs=(10, 47, 161, 300)
step 1056, loss=0.450458, accuracy=0.8, t=10.27, inputs=(10, 19, 108, 300)
step 1057, loss=0.490451, accuracy=0.9, t=8.44, inputs=(10, 33, 69, 300)
step 1058, loss=0.585895, accuracy=0.6, t=8.64, inputs=(10, 19, 87, 300)
step 1059, loss=0.378504, accuracy=0.8, t=10.56, inputs=(10, 19, 107, 300)
step 1060, loss=0.485774, accuracy=0.7, t=13.19, inputs=(10, 55, 103, 300)
step 1061, loss=0.609435, accuracy=0.7, t=12.52, inputs=(10, 14, 136, 300)
step 1062, loss=0.235002, accuracy=0.9, t=7.89, inputs=(10, 18, 79, 300)
step 1063, loss=0.343218, accuracy=0.8, t=9.48, inputs=(10, 19, 97, 300)
step 1064, loss=0.820548, accuracy=0.6, t=10.19, inputs=(10, 32, 90, 300)
step 1065, loss=0.698562, accuracy=0.8, t=7.45, inputs=(10, 19, 72, 300)
step 1066, loss=0.522477, accuracy=0.7, t=9.93, inputs=(10, 26, 94, 300)
step 1067, loss=0.296897, accuracy=0.9, t=8.68, inputs=(10, 44, 60, 300)
step 1068, loss=0.612612, accuracy=0.7, t=10.68, inputs=(10, 31, 100, 300)
step 1069, loss=0.189879, accuracy=0.9, t=5.55, inputs=(10, 14, 56, 300)
step 1070, loss=0.328427, accuracy=0.8, t=7.2, inputs=(10, 29, 56, 300)
step 1071, loss=0.466428, accuracy=0.8, t=9.92, inputs=(10, 42, 80, 300)
step 1072, loss=0.141767, accuracy=1.0, t=10.77, inputs=(10, 22, 107, 300)
step 1073, loss=0.75373, accuracy=0.7, t=9.45, inputs=(10, 29, 83, 300)
step 1074, loss=0.293113, accuracy=0.9, t=10.23, inputs=(10, 41, 83, 300)
step 1075, loss=0.712985, accuracy=0.6, t=7.17, inputs=(10, 25, 61, 300)
step 1076, loss=0.319515, accuracy=0.9, t=7.33, inputs=(10, 26, 66, 300)
step 1077, loss=0.594371, accuracy=0.8, t=10.13, inputs=(10, 31, 92, 300)
step 1078, loss=0.454632, accuracy=0.8, t=10.55, inputs=(10, 22, 105, 300)
step 1079, loss=0.774027, accuracy=0.8, t=8.46, inputs=(10, 45, 60, 300)
step 1080, loss=0.383976, accuracy=0.9, t=19.06, inputs=(10, 26, 205, 300)
step 1081, loss=0.386882, accuracy=0.8, t=8.01, inputs=(10, 28, 68, 300)
step 1082, loss=0.281301, accuracy=0.9, t=6.54, inputs=(10, 21, 57, 300)
step 1083, loss=0.844883, accuracy=0.6, t=19.94, inputs=(10, 47, 190, 300)
step 1084, loss=0.409067, accuracy=0.7, t=12.41, inputs=(10, 19, 129, 300)
step 1085, loss=0.329149, accuracy=0.8, t=9.71, inputs=(10, 41, 72, 300)
step 1086, loss=0.474178, accuracy=0.9, t=8.49, inputs=(10, 43, 59, 300)
step 1087, loss=0.688914, accuracy=0.4, t=11.14, inputs=(10, 14, 119, 300)
step 1088, loss=0.636612, accuracy=0.6, t=10.04, inputs=(10, 17, 105, 300)
step 1089, loss=0.491633, accuracy=0.8, t=10.05, inputs=(10, 19, 102, 300)
step 1090, loss=0.273207, accuracy=0.9, t=8.54, inputs=(10, 21, 85, 300)
step 1091, loss=0.591109, accuracy=0.6, t=10.01, inputs=(10, 37, 84, 300)
step 1092, loss=0.26727, accuracy=1.0, t=7.83, inputs=(10, 41, 52, 300)
step 1093, loss=0.747955, accuracy=0.7, t=8.21, inputs=(10, 16, 86, 300)
step 1094, loss=0.356672, accuracy=0.9, t=12.73, inputs=(10, 36, 117, 300)
step 1095, loss=0.336374, accuracy=0.9, t=7.38, inputs=(10, 26, 65, 300)
step 1096, loss=0.386715, accuracy=0.9, t=6.77, inputs=(10, 21, 63, 300)
step 1097, loss=0.391466, accuracy=0.9, t=7.42, inputs=(10, 13, 83, 300)
step 1098, loss=0.233315, accuracy=1.0, t=11.94, inputs=(10, 33, 109, 300)
step 1099, loss=0.577483, accuracy=0.6, t=9.46, inputs=(10, 38, 77, 300)
step 1100, loss=0.370054, accuracy=0.8, t=8.77, inputs=(10, 38, 69, 300)
checkpoint & graph meta
checkpoint done
step 1101, loss=0.391532, accuracy=0.8, t=9.82, inputs=(10, 27, 87, 300)
step 1102, loss=0.41757, accuracy=0.8, t=9.05, inputs=(10, 41, 65, 300)
step 1103, loss=0.407879, accuracy=0.8, t=9.65, inputs=(10, 43, 72, 300)
step 1104, loss=0.535599, accuracy=0.8, t=9.69, inputs=(10, 22, 96, 300)
step 1105, loss=0.483359, accuracy=0.8, t=9.14, inputs=(10, 42, 67, 300)
step 1106, loss=0.652644, accuracy=0.6, t=9.67, inputs=(10, 44, 72, 300)
step 1107, loss=0.256669, accuracy=0.9, t=12.52, inputs=(10, 23, 128, 300)
step 1108, loss=0.407381, accuracy=0.8, t=7.06, inputs=(10, 28, 58, 300)
step 1109, loss=0.519291, accuracy=0.8, t=8.35, inputs=(10, 24, 76, 300)
step 1110, loss=0.537154, accuracy=0.7, t=9.14, inputs=(10, 15, 97, 300)
step 1111, loss=1.03258, accuracy=0.6, t=6.6, inputs=(10, 13, 68, 300)
step 1112, loss=0.769742, accuracy=0.7, t=9.09, inputs=(10, 36, 73, 300)
step 1113, loss=0.22799, accuracy=1.0, t=11.02, inputs=(10, 32, 102, 300)
step 1114, loss=0.311521, accuracy=0.9, t=9.06, inputs=(10, 32, 75, 300)
step 1115, loss=0.39051, accuracy=0.8, t=17.68, inputs=(10, 31, 178, 300)
step 1116, loss=0.213978, accuracy=1.0, t=6.21, inputs=(10, 16, 59, 300)
step 1117, loss=0.289618, accuracy=0.9, t=11.92, inputs=(10, 48, 96, 300)
step 1118, loss=0.516912, accuracy=0.7, t=8.46, inputs=(10, 36, 66, 300)
step 1119, loss=0.351004, accuracy=0.8, t=9.62, inputs=(10, 44, 71, 300)
step 1120, loss=0.832305, accuracy=0.7, t=8.54, inputs=(10, 25, 79, 300)
step 1121, loss=0.413739, accuracy=0.9, t=7.32, inputs=(10, 16, 73, 300)
step 1122, loss=0.540056, accuracy=0.8, t=7.17, inputs=(10, 28, 59, 300)
step 1123, loss=0.484507, accuracy=0.9, t=13.16, inputs=(10, 26, 128, 300)
step 1124, loss=0.254405, accuracy=0.9, t=10.14, inputs=(10, 21, 101, 300)
step 1125, loss=0.301994, accuracy=0.9, t=7.11, inputs=(10, 33, 50, 300)
step 1126, loss=0.505236, accuracy=0.6, t=8.15, inputs=(10, 32, 68, 300)
step 1127, loss=0.495421, accuracy=0.7, t=9.71, inputs=(10, 31, 86, 300)
step 1128, loss=0.434724, accuracy=0.8, t=10.97, inputs=(10, 18, 114, 300)
step 1129, loss=0.211913, accuracy=1.0, t=7.94, inputs=(10, 24, 69, 300)
step 1130, loss=0.220299, accuracy=1.0, t=5.72, inputs=(10, 17, 54, 300)
step 1131, loss=0.232528, accuracy=0.9, t=8.75, inputs=(10, 26, 82, 300)
step 1132, loss=0.300236, accuracy=0.9, t=8.14, inputs=(10, 28, 68, 300)
step 1133, loss=0.304774, accuracy=0.9, t=12.95, inputs=(10, 45, 109, 300)
step 1134, loss=0.3779, accuracy=0.8, t=9.07, inputs=(10, 43, 61, 300)
step 1135, loss=0.445155, accuracy=0.9, t=8.26, inputs=(10, 16, 84, 300)
step 1136, loss=0.284841, accuracy=0.9, t=22.9, inputs=(10, 27, 251, 300)
step 1137, loss=0.369304, accuracy=0.9, t=5.87, inputs=(10, 13, 62, 300)
step 1138, loss=0.628122, accuracy=0.6, t=9.35, inputs=(10, 22, 93, 300)
step 1139, loss=0.239173, accuracy=0.9, t=8.31, inputs=(10, 14, 90, 300)
step 1140, loss=0.364129, accuracy=0.9, t=6.54, inputs=(10, 18, 63, 300)
step 1141, loss=0.138578, accuracy=1.0, t=11.79, inputs=(10, 19, 125, 300)
step 1142, loss=0.439469, accuracy=0.8, t=9.71, inputs=(10, 22, 95, 300)
step 1143, loss=0.814352, accuracy=0.7, t=8.49, inputs=(10, 21, 82, 300)
step 1144, loss=0.24896, accuracy=0.9, t=7.51, inputs=(10, 19, 74, 300)
step 1145, loss=0.93101, accuracy=0.6, t=8.51, inputs=(10, 38, 65, 300)
step 1146, loss=0.440163, accuracy=0.7, t=8.26, inputs=(10, 21, 80, 300)
step 1147, loss=0.735651, accuracy=0.6, t=10.17, inputs=(10, 47, 73, 300)
step 1148, loss=0.399516, accuracy=0.8, t=9.97, inputs=(10, 61, 51, 300)
step 1149, loss=0.266421, accuracy=0.9, t=7.34, inputs=(10, 22, 68, 300)
step 1150, loss=0.240243, accuracy=0.9, t=6.88, inputs=(10, 29, 56, 300)
step 1151, loss=0.424204, accuracy=0.8, t=15.71, inputs=(10, 51, 140, 300)
step 1152, loss=0.181523, accuracy=1.0, t=8.1, inputs=(10, 24, 75, 300)
step 1153, loss=0.636454, accuracy=0.6, t=5.89, inputs=(10, 16, 57, 300)
step 1154, loss=0.347022, accuracy=0.9, t=8.93, inputs=(10, 34, 73, 300)
step 1155, loss=0.149698, accuracy=1.0, t=10.85, inputs=(10, 29, 105, 300)
step 1156, loss=0.447943, accuracy=0.7, t=14.19, inputs=(10, 45, 124, 300)
step 1157, loss=0.369565, accuracy=0.8, t=11.15, inputs=(10, 32, 101, 300)
step 1158, loss=0.626607, accuracy=0.7, t=9.48, inputs=(10, 25, 90, 300)
step 1159, loss=0.422328, accuracy=0.9, t=9.73, inputs=(10, 47, 69, 300)
step 1160, loss=0.173343, accuracy=1.0, t=14.83, inputs=(10, 27, 152, 300)
step 1161, loss=0.615057, accuracy=0.7, t=9.68, inputs=(10, 38, 79, 300)
step 1162, loss=0.319641, accuracy=0.8, t=8.82, inputs=(10, 23, 86, 300)
step 1163, loss=0.240934, accuracy=1.0, t=8.79, inputs=(10, 26, 80, 300)
step 1164, loss=0.478419, accuracy=0.8, t=7.49, inputs=(10, 19, 72, 300)
step 1165, loss=0.261571, accuracy=0.8, t=9.65, inputs=(10, 31, 88, 300)
step 1166, loss=0.487153, accuracy=0.8, t=8.09, inputs=(10, 18, 86, 300)
step 1167, loss=0.424408, accuracy=0.8, t=10.09, inputs=(10, 28, 95, 300)
step 1168, loss=0.652776, accuracy=0.7, t=8.84, inputs=(10, 17, 90, 300)
step 1169, loss=0.450127, accuracy=0.6, t=16.95, inputs=(10, 45, 154, 300)
step 1170, loss=0.453943, accuracy=0.8, t=9.19, inputs=(10, 41, 70, 300)
step 1171, loss=0.776634, accuracy=0.6, t=15.32, inputs=(10, 23, 162, 300)
step 1172, loss=0.557548, accuracy=0.8, t=11.52, inputs=(10, 38, 101, 300)
step 1173, loss=0.451408, accuracy=0.7, t=9.23, inputs=(10, 26, 86, 300)
step 1174, loss=0.251804, accuracy=0.9, t=10.12, inputs=(10, 27, 98, 300)
step 1175, loss=0.334542, accuracy=0.9, t=8.7, inputs=(10, 32, 70, 300)
step 1176, loss=0.365646, accuracy=0.9, t=8.47, inputs=(10, 26, 79, 300)
step 1177, loss=0.700304, accuracy=0.7, t=9.61, inputs=(10, 28, 89, 300)
step 1178, loss=0.478574, accuracy=0.8, t=6.2, inputs=(10, 16, 59, 300)
step 1179, loss=0.166882, accuracy=1.0, t=8.44, inputs=(10, 35, 65, 300)
step 1180, loss=0.344387, accuracy=0.8, t=6.37, inputs=(10, 29, 51, 300)
step 1181, loss=0.319332, accuracy=0.9, t=10.93, inputs=(10, 27, 102, 300)
step 1182, loss=0.233083, accuracy=0.9, t=10.49, inputs=(10, 24, 105, 300)
step 1183, loss=0.226661, accuracy=0.9, t=10.17, inputs=(10, 26, 98, 300)
step 1184, loss=0.240793, accuracy=1.0, t=17.97, inputs=(10, 21, 196, 300)
step 1185, loss=0.296133, accuracy=0.9, t=17.25, inputs=(10, 38, 170, 300)
step 1186, loss=0.431272, accuracy=0.8, t=9.53, inputs=(10, 45, 68, 300)
step 1187, loss=0.704154, accuracy=0.7, t=9.76, inputs=(10, 40, 76, 300)
step 1188, loss=0.655465, accuracy=0.7, t=8.57, inputs=(10, 17, 91, 300)
step 1189, loss=0.443184, accuracy=0.8, t=19.54, inputs=(10, 42, 191, 300)
step 1190, loss=0.352402, accuracy=0.8, t=6.79, inputs=(10, 17, 67, 300)
step 1191, loss=0.106242, accuracy=1.0, t=6.93, inputs=(10, 25, 60, 300)
step 1192, loss=0.398878, accuracy=0.8, t=9.16, inputs=(10, 14, 98, 300)
step 1193, loss=0.653879, accuracy=0.6, t=8.01, inputs=(10, 27, 71, 300)
step 1194, loss=0.933637, accuracy=0.5, t=9.33, inputs=(10, 42, 71, 300)
step 1195, loss=0.931816, accuracy=0.6, t=8.29, inputs=(10, 16, 87, 300)
step 1196, loss=0.456731, accuracy=0.8, t=20.11, inputs=(10, 64, 171, 300)
step 1197, loss=0.400244, accuracy=0.9, t=10.35, inputs=(10, 33, 93, 300)
step 1198, loss=0.369672, accuracy=0.9, t=7.43, inputs=(10, 26, 66, 300)
step 1199, loss=0.381577, accuracy=0.9, t=10.39, inputs=(10, 15, 115, 300)
step 1200, loss=0.54683, accuracy=0.8, t=7.03, inputs=(10, 19, 68, 300)
checkpoint & graph meta
checkpoint done
step 1201, loss=0.18552, accuracy=1.0, t=9.05, inputs=(10, 26, 85, 300)
step 1202, loss=0.670906, accuracy=0.7, t=11.18, inputs=(10, 28, 106, 300)
step 1203, loss=0.445863, accuracy=0.8, t=11.5, inputs=(10, 20, 120, 300)
step 1204, loss=0.310104, accuracy=0.8, t=14.02, inputs=(10, 50, 115, 300)
step 1205, loss=0.229746, accuracy=1.0, t=5.0, inputs=(10, 16, 47, 300)
step 1206, loss=0.551347, accuracy=0.8, t=10.71, inputs=(10, 27, 103, 300)
step 1207, loss=0.248945, accuracy=0.9, t=12.06, inputs=(10, 30, 115, 300)
step 1208, loss=0.361612, accuracy=0.8, t=7.6, inputs=(10, 22, 71, 300)
step 1209, loss=0.331356, accuracy=0.7, t=7.26, inputs=(10, 29, 57, 300)
step 1210, loss=0.284641, accuracy=0.8, t=8.84, inputs=(10, 32, 78, 300)
step 1211, loss=0.88745, accuracy=0.5, t=6.56, inputs=(10, 22, 56, 300)
step 1212, loss=0.363117, accuracy=0.8, t=8.83, inputs=(10, 20, 88, 300)
step 1213, loss=0.684399, accuracy=0.5, t=9.93, inputs=(10, 22, 98, 300)
step 1214, loss=0.930735, accuracy=0.6, t=6.37, inputs=(10, 23, 55, 300)
step 1215, loss=1.28849, accuracy=0.6, t=8.29, inputs=(10, 27, 71, 300)
step 1216, loss=0.517885, accuracy=0.7, t=8.45, inputs=(10, 42, 58, 300)
step 1217, loss=0.489047, accuracy=0.8, t=9.24, inputs=(10, 36, 76, 300)
step 1218, loss=0.346473, accuracy=0.9, t=7.62, inputs=(10, 32, 60, 300)
step 1219, loss=0.204149, accuracy=1.0, t=6.39, inputs=(10, 17, 63, 300)
step 1220, loss=0.277529, accuracy=0.9, t=12.29, inputs=(10, 14, 132, 300)
step 1221, loss=0.744122, accuracy=0.6, t=5.76, inputs=(10, 17, 55, 300)
step 1222, loss=0.340976, accuracy=0.7, t=9.59, inputs=(10, 24, 91, 300)
step 1223, loss=0.542282, accuracy=0.9, t=15.86, inputs=(10, 47, 136, 300)
step 1224, loss=0.262522, accuracy=1.0, t=8.06, inputs=(10, 18, 78, 300)
step 1225, loss=0.358085, accuracy=0.8, t=9.56, inputs=(10, 40, 73, 300)
step 1226, loss=0.630241, accuracy=0.6, t=12.25, inputs=(10, 33, 111, 300)
step 1227, loss=0.228251, accuracy=1.0, t=10.92, inputs=(10, 49, 79, 300)
step 1228, loss=0.476321, accuracy=0.8, t=17.6, inputs=(10, 27, 184, 300)
step 1229, loss=0.493247, accuracy=0.7, t=9.49, inputs=(10, 40, 72, 300)
step 1230, loss=0.28097, accuracy=0.9, t=7.94, inputs=(10, 21, 75, 300)
step 1231, loss=0.307293, accuracy=0.9, t=16.89, inputs=(10, 50, 148, 300)
step 1232, loss=0.464882, accuracy=0.8, t=10.99, inputs=(10, 39, 95, 300)
step 1233, loss=0.970244, accuracy=0.7, t=21.43, inputs=(10, 22, 231, 300)
step 1234, loss=0.568061, accuracy=0.6, t=7.84, inputs=(10, 25, 71, 300)
step 1235, loss=0.742172, accuracy=0.6, t=10.49, inputs=(10, 27, 101, 300)
step 1236, loss=1.08141, accuracy=0.5, t=30.41, inputs=(10, 59, 244, 300)
step 1237, loss=0.315182, accuracy=0.9, t=13.28, inputs=(10, 87, 64, 300)
step 1238, loss=0.378267, accuracy=0.7, t=7.24, inputs=(10, 25, 62, 300)
step 1239, loss=0.178035, accuracy=1.0, t=8.63, inputs=(10, 26, 78, 300)
step 1240, loss=0.383977, accuracy=0.9, t=20.42, inputs=(10, 37, 206, 300)
step 1241, loss=0.447234, accuracy=0.8, t=7.54, inputs=(10, 22, 71, 300)
step 1242, loss=0.643986, accuracy=0.6, t=8.61, inputs=(10, 30, 71, 300)
step 1243, loss=0.384712, accuracy=0.8, t=7.06, inputs=(10, 20, 70, 300)
step 1244, loss=0.572842, accuracy=0.7, t=7.73, inputs=(10, 28, 65, 300)
step 1245, loss=0.431656, accuracy=0.7, t=14.02, inputs=(10, 31, 137, 300)
step 1246, loss=0.376786, accuracy=0.8, t=19.11, inputs=(10, 25, 203, 300)
step 1247, loss=0.667883, accuracy=0.7, t=7.4, inputs=(10, 19, 69, 300)
step 1248, loss=0.456201, accuracy=0.6, t=5.86, inputs=(10, 23, 50, 300)
step 1249, loss=0.370532, accuracy=0.8, t=6.52, inputs=(10, 23, 56, 300)
step 1250, loss=0.628372, accuracy=0.7, t=7.78, inputs=(10, 34, 61, 300)
step 1251, loss=0.590653, accuracy=0.8, t=5.08, inputs=(10, 10, 54, 300)
step 1252, loss=0.513292, accuracy=0.7, t=8.9, inputs=(10, 38, 71, 300)
step 1253, loss=0.690014, accuracy=0.7, t=7.13, inputs=(10, 17, 70, 300)
step 1254, loss=0.537719, accuracy=0.7, t=9.54, inputs=(10, 17, 102, 300)
step 1255, loss=0.280514, accuracy=0.9, t=6.69, inputs=(10, 23, 59, 300)
step 1256, loss=0.377387, accuracy=0.8, t=14.95, inputs=(10, 33, 146, 300)
step 1257, loss=0.298618, accuracy=0.9, t=8.24, inputs=(10, 29, 72, 300)
step 1258, loss=0.348987, accuracy=0.8, t=7.86, inputs=(10, 22, 78, 300)
step 1259, loss=0.234813, accuracy=1.0, t=8.22, inputs=(10, 40, 58, 300)
step 1260, loss=0.503779, accuracy=0.8, t=9.63, inputs=(10, 45, 68, 300)
step 1261, loss=0.645873, accuracy=0.5, t=24.77, inputs=(10, 33, 259, 300)
step 1262, loss=0.376392, accuracy=0.9, t=9.1, inputs=(10, 29, 83, 300)
step 1263, loss=0.260807, accuracy=0.9, t=9.16, inputs=(10, 26, 83, 300)
step 1264, loss=0.417677, accuracy=0.9, t=9.31, inputs=(10, 52, 60, 300)
step 1265, loss=0.265822, accuracy=1.0, t=5.96, inputs=(10, 27, 43, 300)
step 1266, loss=0.267068, accuracy=1.0, t=13.14, inputs=(10, 23, 133, 300)
step 1267, loss=0.352649, accuracy=0.8, t=14.42, inputs=(10, 40, 132, 300)
step 1268, loss=0.307356, accuracy=0.9, t=8.42, inputs=(10, 42, 58, 300)
step 1269, loss=0.354902, accuracy=0.9, t=8.71, inputs=(10, 27, 77, 300)
step 1270, loss=0.493538, accuracy=0.7, t=17.8, inputs=(10, 18, 200, 300)
step 1271, loss=0.873285, accuracy=0.5, t=8.31, inputs=(10, 28, 75, 300)
step 1272, loss=0.28415, accuracy=0.9, t=8.72, inputs=(10, 43, 60, 300)
step 1273, loss=0.424775, accuracy=0.8, t=22.98, inputs=(10, 17, 259, 300)
step 1274, loss=0.331883, accuracy=0.9, t=11.19, inputs=(10, 49, 85, 300)
step 1275, loss=0.763277, accuracy=0.5, t=6.87, inputs=(10, 22, 61, 300)
step 1276, loss=0.436342, accuracy=0.8, t=12.45, inputs=(10, 47, 100, 300)
step 1277, loss=0.254382, accuracy=0.9, t=16.32, inputs=(10, 23, 175, 300)
step 1278, loss=0.780936, accuracy=0.5, t=9.29, inputs=(10, 15, 96, 300)
step 1279, loss=0.554259, accuracy=0.7, t=14.74, inputs=(10, 18, 150, 300)
step 1280, loss=0.398176, accuracy=0.9, t=10.8, inputs=(10, 40, 90, 300)
step 1281, loss=0.463681, accuracy=0.8, t=14.72, inputs=(10, 22, 154, 300)
step 1282, loss=0.52229, accuracy=0.8, t=7.8, inputs=(10, 16, 78, 300)
step 1283, loss=0.383695, accuracy=0.8, t=8.72, inputs=(10, 18, 88, 300)
step 1284, loss=0.407116, accuracy=0.8, t=12.85, inputs=(10, 18, 139, 300)
step 1285, loss=0.243374, accuracy=1.0, t=11.26, inputs=(10, 26, 114, 300)
step 1286, loss=0.320815, accuracy=0.8, t=10.52, inputs=(10, 34, 94, 300)
step 1287, loss=0.456247, accuracy=0.7, t=8.71, inputs=(10, 40, 61, 300)
step 1288, loss=0.298586, accuracy=0.9, t=8.17, inputs=(10, 46, 50, 300)
step 1289, loss=0.542254, accuracy=0.7, t=17.44, inputs=(10, 32, 180, 300)
step 1290, loss=0.375777, accuracy=0.8, t=10.87, inputs=(10, 21, 114, 300)
step 1291, loss=0.368416, accuracy=0.8, t=16.23, inputs=(10, 28, 166, 300)
step 1292, loss=0.605752, accuracy=0.9, t=9.66, inputs=(10, 38, 78, 300)
step 1293, loss=0.553933, accuracy=0.7, t=8.38, inputs=(10, 27, 75, 300)
step 1294, loss=0.233405, accuracy=1.0, t=10.18, inputs=(10, 32, 88, 300)
step 1295, loss=0.523757, accuracy=0.8, t=7.28, inputs=(10, 19, 68, 300)
step 1296, loss=0.206038, accuracy=1.0, t=10.56, inputs=(10, 50, 75, 300)
step 1297, loss=0.204968, accuracy=1.0, t=12.78, inputs=(10, 35, 115, 300)
step 1298, loss=0.298341, accuracy=0.8, t=12.97, inputs=(10, 25, 131, 300)
step 1299, loss=0.275007, accuracy=0.8, t=9.46, inputs=(10, 39, 73, 300)
step 1300, loss=0.250542, accuracy=0.9, t=12.02, inputs=(10, 23, 120, 300)
checkpoint & graph meta
checkpoint done
step 1301, loss=0.465646, accuracy=0.8, t=19.67, inputs=(10, 29, 206, 300)
step 1302, loss=0.232545, accuracy=0.9, t=8.89, inputs=(10, 37, 68, 300)
step 1303, loss=0.33543, accuracy=0.9, t=8.12, inputs=(10, 32, 65, 300)
step 1304, loss=0.891791, accuracy=0.6, t=14.31, inputs=(10, 29, 143, 300)
step 1305, loss=0.604335, accuracy=0.7, t=7.91, inputs=(10, 21, 79, 300)
step 1306, loss=0.140974, accuracy=1.0, t=17.09, inputs=(10, 41, 166, 300)
step 1307, loss=0.27591, accuracy=0.9, t=10.69, inputs=(10, 62, 65, 300)
step 1308, loss=0.44514, accuracy=0.8, t=9.22, inputs=(10, 29, 79, 300)
step 1309, loss=0.257664, accuracy=0.9, t=14.56, inputs=(10, 81, 82, 300)
step 1310, loss=0.229692, accuracy=1.0, t=6.86, inputs=(10, 20, 66, 300)
step 1311, loss=0.175664, accuracy=1.0, t=7.68, inputs=(10, 18, 75, 300)
step 1312, loss=0.307832, accuracy=1.0, t=15.15, inputs=(10, 35, 144, 300)
step 1313, loss=0.365402, accuracy=0.7, t=7.19, inputs=(10, 23, 63, 300)
step 1314, loss=0.55923, accuracy=0.8, t=8.26, inputs=(10, 23, 78, 300)
step 1315, loss=0.33109, accuracy=0.8, t=8.87, inputs=(10, 26, 80, 300)
step 1316, loss=0.447374, accuracy=0.9, t=9.43, inputs=(10, 37, 78, 300)
step 1317, loss=0.471767, accuracy=0.9, t=8.52, inputs=(10, 24, 80, 300)
step 1318, loss=0.488085, accuracy=0.8, t=7.25, inputs=(10, 18, 70, 300)
step 1319, loss=0.318049, accuracy=0.8, t=8.53, inputs=(10, 28, 74, 300)
step 1320, loss=0.656681, accuracy=0.7, t=11.67, inputs=(10, 26, 116, 300)
step 1321, loss=0.493266, accuracy=0.7, t=6.83, inputs=(10, 20, 64, 300)
step 1322, loss=0.766415, accuracy=0.6, t=24.0, inputs=(10, 22, 267, 300)
step 1323, loss=0.56124, accuracy=0.7, t=10.81, inputs=(10, 31, 100, 300)
step 1324, loss=0.650326, accuracy=0.7, t=7.42, inputs=(10, 28, 64, 300)
step 1325, loss=0.398578, accuracy=0.9, t=16.96, inputs=(10, 16, 191, 300)
step 1326, loss=0.419083, accuracy=0.9, t=10.06, inputs=(10, 38, 84, 300)
step 1327, loss=0.552642, accuracy=0.8, t=15.8, inputs=(10, 34, 160, 300)
step 1328, loss=0.393857, accuracy=0.8, t=8.28, inputs=(10, 23, 75, 300)
step 1329, loss=0.317081, accuracy=0.8, t=9.08, inputs=(10, 50, 55, 300)
step 1330, loss=0.335982, accuracy=0.8, t=5.11, inputs=(10, 15, 48, 300)
step 1331, loss=0.478192, accuracy=0.8, t=12.48, inputs=(10, 28, 126, 300)
step 1332, loss=0.388411, accuracy=0.7, t=8.43, inputs=(10, 26, 74, 300)
step 1333, loss=0.246198, accuracy=0.9, t=14.08, inputs=(10, 22, 143, 300)
step 1334, loss=0.812329, accuracy=0.6, t=18.8, inputs=(10, 31, 187, 300)
step 1335, loss=0.372035, accuracy=0.9, t=11.59, inputs=(10, 25, 108, 300)
step 1336, loss=0.409654, accuracy=0.8, t=9.01, inputs=(10, 17, 88, 300)
step 1337, loss=0.285854, accuracy=0.9, t=9.13, inputs=(10, 42, 67, 300)
step 1338, loss=0.293651, accuracy=0.9, t=9.39, inputs=(10, 27, 87, 300)
step 1339, loss=0.463181, accuracy=0.8, t=5.48, inputs=(10, 13, 54, 300)
step 1340, loss=0.20811, accuracy=1.0, t=9.84, inputs=(10, 48, 68, 300)
step 1341, loss=0.256009, accuracy=0.8, t=10.63, inputs=(10, 65, 60, 300)
step 1342, loss=0.471762, accuracy=0.7, t=8.07, inputs=(10, 25, 73, 300)
step 1343, loss=0.576172, accuracy=0.8, t=5.89, inputs=(10, 15, 58, 300)
step 1344, loss=0.426364, accuracy=0.7, t=6.62, inputs=(10, 25, 56, 300)
step 1345, loss=0.286631, accuracy=0.8, t=11.34, inputs=(10, 37, 101, 300)
step 1346, loss=0.56284, accuracy=0.6, t=9.89, inputs=(10, 18, 100, 300)
step 1347, loss=0.293012, accuracy=0.9, t=12.18, inputs=(10, 33, 111, 300)
step 1348, loss=0.258757, accuracy=0.9, t=8.65, inputs=(10, 26, 78, 300)
step 1349, loss=0.385561, accuracy=0.8, t=9.24, inputs=(10, 38, 72, 300)
step 1350, loss=0.349196, accuracy=0.8, t=8.43, inputs=(10, 26, 76, 300)
step 1351, loss=0.172044, accuracy=1.0, t=8.33, inputs=(10, 17, 84, 300)
step 1352, loss=0.431776, accuracy=0.8, t=9.71, inputs=(10, 42, 71, 300)
step 1353, loss=0.277851, accuracy=0.8, t=14.49, inputs=(10, 21, 150, 300)
step 1354, loss=0.142872, accuracy=1.0, t=5.51, inputs=(10, 13, 57, 300)
step 1355, loss=0.203268, accuracy=0.9, t=8.45, inputs=(10, 13, 93, 300)
step 1356, loss=0.236133, accuracy=0.9, t=5.85, inputs=(10, 32, 42, 300)
step 1357, loss=0.399473, accuracy=0.8, t=5.91, inputs=(10, 19, 54, 300)
step 1358, loss=0.321787, accuracy=0.9, t=12.08, inputs=(10, 52, 91, 300)
step 1359, loss=0.331553, accuracy=0.9, t=9.41, inputs=(10, 45, 69, 300)
step 1360, loss=0.235143, accuracy=0.9, t=16.42, inputs=(10, 24, 173, 300)
step 1361, loss=0.63908, accuracy=0.7, t=8.87, inputs=(10, 22, 85, 300)
step 1362, loss=0.428152, accuracy=0.8, t=16.84, inputs=(10, 56, 146, 300)
step 1363, loss=0.612771, accuracy=0.6, t=8.77, inputs=(10, 27, 79, 300)
step 1364, loss=0.163914, accuracy=1.0, t=7.52, inputs=(10, 17, 80, 300)
step 1365, loss=0.717548, accuracy=0.6, t=12.27, inputs=(10, 17, 131, 300)
step 1366, loss=0.537924, accuracy=0.8, t=12.9, inputs=(10, 33, 123, 300)
step 1367, loss=0.341918, accuracy=0.9, t=8.9, inputs=(10, 31, 74, 300)
step 1368, loss=0.0993479, accuracy=1.0, t=12.64, inputs=(10, 15, 137, 300)
step 1369, loss=0.29515, accuracy=0.9, t=16.9, inputs=(10, 20, 183, 300)
step 1370, loss=0.313737, accuracy=0.9, t=8.33, inputs=(10, 27, 74, 300)
step 1371, loss=1.06959, accuracy=0.5, t=11.92, inputs=(10, 44, 93, 300)
step 1372, loss=0.151171, accuracy=1.0, t=10.78, inputs=(10, 39, 85, 300)
step 1373, loss=0.424441, accuracy=0.7, t=8.8, inputs=(10, 36, 65, 300)
step 1374, loss=0.148191, accuracy=1.0, t=15.58, inputs=(10, 23, 162, 300)
step 1375, loss=0.197936, accuracy=1.0, t=10.42, inputs=(10, 33, 93, 300)
step 1376, loss=0.119856, accuracy=1.0, t=6.59, inputs=(10, 23, 57, 300)
step 1377, loss=0.505536, accuracy=0.7, t=7.06, inputs=(10, 30, 56, 300)
step 1378, loss=0.778654, accuracy=0.6, t=12.07, inputs=(10, 24, 123, 300)
step 1379, loss=0.52176, accuracy=0.7, t=8.65, inputs=(10, 26, 81, 300)
step 1380, loss=0.526633, accuracy=0.7, t=18.0, inputs=(10, 24, 192, 300)
step 1381, loss=0.230732, accuracy=0.8, t=9.64, inputs=(10, 54, 63, 300)

In [ ]: