In [46]:
import time
from collections import namedtuple

import numpy as np
import tensorflow as tf

In [47]:
with open('fortress_besieged.txt', 'r') as f:
    text=f.read()
vocab = set(text)
vocab_to_int = {c: i for i, c in enumerate(vocab)}
int_to_vocab = dict(enumerate(vocab))
encoded = np.array([vocab_to_int[c] for c in text], dtype=np.int32)

In [48]:
text[:100]


Out[48]:
'一\n\n\n红海早过了,船在印度洋面上开驶着,但是太阳依然不饶人地迟落早起,侵占去大部分的夜。夜仿佛纸浸了油,变成半透明体;它给太阳拥抱住了,分不出身来,也许是给太阳陶醉了,所以夕照晚霞隐褪后的夜色也带着'

In [49]:
encoded[:100]


Out[49]:
array([ 255, 1185, 1185, 1185, 1036, 2298,  709, 2007,  980,  234,  650,
       3324, 1867, 2084, 3101, 3227, 1652, 2033,  893, 2723,  234,  240,
        527, 1099, 2584,   83, 1954,  610, 2124,  956,  418,  541, 2627,
        709,  439,  234,  446,  913, 2118,  529, 2871, 2459,  743, 2552,
       2481, 2552, 2050, 3195, 2465, 2025,  980, 1410,  234, 1475, 1220,
       3037,  148,  398, 2436, 2524, 2630,  803, 1099, 2584, 1455, 2598,
       1433,  980,  234, 2459,  610, 2974, 2405, 1050,  234, 2142, 3083,
        527,  803, 1099, 2584, 2710, 2797,  980,  234, 1000, 2511,  881,
       1146,  383,  746,  755, 1914, 2729,  743, 2552, 2540, 2142, 2060,
       2723], dtype=int32)

In [50]:
len(vocab)


Out[50]:
3325

In [51]:
def get_batches(arr, n_seqs, n_steps):
    '''Create a generator that returns batches of size
       n_seqs x n_steps from arr.
       
       Arguments
       ---------
       arr: Array you want to make batches from
       n_seqs: Batch size, the number of sequences per batch
       n_steps: Number of sequence steps per batch
    '''
    # Get the batch size and number of batches we can make
    batch_size = n_seqs * n_steps
    n_batches = len(arr)//batch_size
    
    # Keep only enough characters to make full batches
    arr = arr[:n_batches * batch_size]
    
    # Reshape into n_seqs rows
    arr = arr.reshape((n_seqs, -1))
    
    for n in range(0, arr.shape[1], n_steps):
        # The features
        x = arr[:, n:n+n_steps]
        # The targets, shifted by one
        y = np.zeros_like(x)
        y[:, :-1], y[:, -1] = x[:, 1:], x[:, 0]
        yield x, y

In [52]:
def build_inputs(batch_size, num_steps):
    ''' Define placeholders for inputs, targets, and dropout 
    
        Arguments
        ---------
        batch_size: Batch size, number of sequences per batch
        num_steps: Number of sequence steps in a batch
        
    '''
    # Declare placeholders we'll feed into the graph
    inputs = tf.placeholder(tf.int32, [batch_size, num_steps], name='inputs')
    targets = tf.placeholder(tf.int32, [batch_size, num_steps], name='targets')
    
    # Keep probability placeholder for drop out layers
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    
    return inputs, targets, keep_prob

In [53]:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
    ''' Build LSTM cell.
    
        Arguments
        ---------
        keep_prob: Scalar tensor (tf.placeholder) for the dropout keep probability
        lstm_size: Size of the hidden layers in the LSTM cells
        num_layers: Number of LSTM layers
        batch_size: Batch size

    '''
    ### Build the LSTM Cell
    # Stack up multiple LSTM layers, for deep learning
    def make_cell(lstm_size):
        # Use a basic LSTM cell
        cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
        # Add dropout to the cell
        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
        return cell
    
    cell = tf.contrib.rnn.MultiRNNCell([make_cell(lstm_size) for _ in range(num_layers)])
    
    initial_state = cell.zero_state(batch_size, tf.float32)
    
    return cell, initial_state

In [54]:
def build_output(lstm_output, in_size, out_size):
    ''' Build a softmax layer, return the softmax output and logits.
    
        Arguments
        ---------
        
        x: Input tensor
        in_size: Size of the input tensor, for example, size of the LSTM cells
        out_size: Size of this softmax layer
    
    '''

    # Reshape output so it's a bunch of rows, one row for each step for each sequence.
    # That is, the shape should be batch_size*num_steps rows by lstm_size columns
    seq_output = tf.concat(lstm_output, axis=1)
    x = tf.reshape(seq_output, [-1, in_size])
    
    # Connect the RNN outputs to a softmax layer
    with tf.variable_scope('softmax'):
        softmax_w = tf.Variable(tf.truncated_normal((in_size, out_size), stddev=0.1))
        softmax_b = tf.Variable(tf.zeros(out_size))
    
    # Since output is a bunch of rows of RNN cell outputs, logits will be a bunch
    # of rows of logit outputs, one for each step and sequence
    logits = tf.matmul(x, softmax_w) + softmax_b
    
    # Use softmax to get the probabilities for predicted characters
    out = tf.nn.softmax(logits, name='predictions')
    
    return out, logits

In [55]:
def build_loss(logits, targets, lstm_size, num_classes):
    ''' Calculate the loss from the logits and the targets.
    
        Arguments
        ---------
        logits: Logits from final fully connected layer
        targets: Targets for supervised learning
        lstm_size: Number of LSTM hidden units
        num_classes: Number of classes in targets
        
    '''
    
    # One-hot encode targets and reshape to match logits, one row per batch_size per step
    y_one_hot = tf.one_hot(targets, num_classes)
    y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
    
    # Softmax cross entropy loss
    loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
    loss = tf.reduce_mean(loss)
    return loss

In [56]:
def build_optimizer(loss, learning_rate, grad_clip):
    ''' Build optmizer for training, using gradient clipping.
    
        Arguments:
        loss: Network loss
        learning_rate: Learning rate for optimizer
    
    '''
    
    # Optimizer for training, using gradient clipping to control exploding gradients
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
    train_op = tf.train.AdamOptimizer(learning_rate)
    optimizer = train_op.apply_gradients(zip(grads, tvars))
    
    return optimizer

In [57]:
class CharRNN:
    
    def __init__(self, num_classes, batch_size=64, num_steps=50, 
                       lstm_size=128, num_layers=2, learning_rate=0.001, 
                       grad_clip=5, sampling=False):
    
        # When we're using this network for sampling later, we'll be passing in
        # one character at a time, so providing an option for that
        if sampling == True:
            batch_size, num_steps = 1, 1
        else:
            batch_size, num_steps = batch_size, num_steps

        tf.reset_default_graph()
        
        # Build the input placeholder tensors
        self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)

        # Build the LSTM cell
        cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)

        ### Run the data through the RNN layers
        # First, one-hot encode the input tokens
        x_one_hot = tf.one_hot(self.inputs, num_classes)
        
        # Run each sequence step through the RNN and collect the outputs
        outputs, state = tf.nn.dynamic_rnn(cell, x_one_hot, initial_state=self.initial_state)
        self.final_state = state
        
        # Get softmax predictions and logits
        self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
        
        # Loss and optimizer (with gradient clipping)
        self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
        self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)

In [58]:
batch_size = 100        # Sequences per batch
num_steps = 100         # Number of sequence steps per batch
lstm_size = 512         # Size of hidden layers in LSTMs
num_layers = 2          # Number of LSTM layers
learning_rate = 0.001   # Learning rate
keep_prob = 0.5         # Dropout keep probability

In [77]:
epochs = 20
# Save every N iterations
save_every_n = 200

model = CharRNN(len(vocab), batch_size=batch_size, num_steps=num_steps,
                lstm_size=lstm_size, num_layers=num_layers, 
                learning_rate=learning_rate)

saver = tf.train.Saver(max_to_keep=100)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    # Use the line below to load a checkpoint and resume training
    saver.restore(sess, 'checkpoints/i440_l512.ckpt')
    counter = 0
    for e in range(epochs):
        # Train network
        new_state = sess.run(model.initial_state)
        loss = 0
        for x, y in get_batches(encoded, batch_size, num_steps):
            counter += 1
            start = time.time()
            feed = {model.inputs: x,
                    model.targets: y,
                    model.keep_prob: keep_prob,
                    model.initial_state: new_state}
            batch_loss, new_state, _ = sess.run([model.loss, 
                                                 model.final_state, 
                                                 model.optimizer], 
                                                 feed_dict=feed)
            
            end = time.time()
            print('Epoch: {}/{}... '.format(e+1, epochs),
                  'Training Step: {}... '.format(counter),
                  'Training loss: {:.4f}... '.format(batch_loss),
                  '{:.4f} sec/batch'.format((end-start)))
        
            if (counter % save_every_n == 0):
                saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
    
    saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))


INFO:tensorflow:Restoring parameters from checkpoints/i440_l512.ckpt
Epoch: 1/20...  Training Step: 1...  Training loss: 2.8258...  0.2785 sec/batch
Epoch: 1/20...  Training Step: 2...  Training loss: 2.7247...  0.2024 sec/batch
Epoch: 1/20...  Training Step: 3...  Training loss: 2.7082...  0.1960 sec/batch
Epoch: 1/20...  Training Step: 4...  Training loss: 2.7476...  0.2043 sec/batch
Epoch: 1/20...  Training Step: 5...  Training loss: 2.7263...  0.1923 sec/batch
Epoch: 1/20...  Training Step: 6...  Training loss: 2.7393...  0.1977 sec/batch
Epoch: 1/20...  Training Step: 7...  Training loss: 2.7189...  0.1934 sec/batch
Epoch: 1/20...  Training Step: 8...  Training loss: 2.7606...  0.1960 sec/batch
Epoch: 1/20...  Training Step: 9...  Training loss: 2.7765...  0.1949 sec/batch
Epoch: 1/20...  Training Step: 10...  Training loss: 2.7898...  0.1995 sec/batch
Epoch: 1/20...  Training Step: 11...  Training loss: 2.7372...  0.1947 sec/batch
Epoch: 1/20...  Training Step: 12...  Training loss: 2.7534...  0.1966 sec/batch
Epoch: 1/20...  Training Step: 13...  Training loss: 2.7254...  0.1932 sec/batch
Epoch: 1/20...  Training Step: 14...  Training loss: 2.7427...  0.2016 sec/batch
Epoch: 1/20...  Training Step: 15...  Training loss: 2.6555...  0.1939 sec/batch
Epoch: 1/20...  Training Step: 16...  Training loss: 2.7016...  0.2080 sec/batch
Epoch: 1/20...  Training Step: 17...  Training loss: 2.6848...  0.1928 sec/batch
Epoch: 1/20...  Training Step: 18...  Training loss: 2.6949...  0.1937 sec/batch
Epoch: 1/20...  Training Step: 19...  Training loss: 2.7095...  0.2018 sec/batch
Epoch: 1/20...  Training Step: 20...  Training loss: 2.6868...  0.1965 sec/batch
Epoch: 1/20...  Training Step: 21...  Training loss: 2.7487...  0.1989 sec/batch
Epoch: 1/20...  Training Step: 22...  Training loss: 2.7653...  0.1971 sec/batch
Epoch: 2/20...  Training Step: 23...  Training loss: 2.7885...  0.1951 sec/batch
Epoch: 2/20...  Training Step: 24...  Training loss: 2.6732...  0.1946 sec/batch
Epoch: 2/20...  Training Step: 25...  Training loss: 2.6724...  0.2005 sec/batch
Epoch: 2/20...  Training Step: 26...  Training loss: 2.7171...  0.1985 sec/batch
Epoch: 2/20...  Training Step: 27...  Training loss: 2.6940...  0.1947 sec/batch
Epoch: 2/20...  Training Step: 28...  Training loss: 2.7144...  0.1948 sec/batch
Epoch: 2/20...  Training Step: 29...  Training loss: 2.6836...  0.2061 sec/batch
Epoch: 2/20...  Training Step: 30...  Training loss: 2.7393...  0.2031 sec/batch
Epoch: 2/20...  Training Step: 31...  Training loss: 2.7369...  0.2020 sec/batch
Epoch: 2/20...  Training Step: 32...  Training loss: 2.7320...  0.1981 sec/batch
Epoch: 2/20...  Training Step: 33...  Training loss: 2.6770...  0.1961 sec/batch
Epoch: 2/20...  Training Step: 34...  Training loss: 2.7020...  0.1930 sec/batch
Epoch: 2/20...  Training Step: 35...  Training loss: 2.6921...  0.1971 sec/batch
Epoch: 2/20...  Training Step: 36...  Training loss: 2.7013...  0.1954 sec/batch
Epoch: 2/20...  Training Step: 37...  Training loss: 2.6314...  0.2006 sec/batch
Epoch: 2/20...  Training Step: 38...  Training loss: 2.6926...  0.1936 sec/batch
Epoch: 2/20...  Training Step: 39...  Training loss: 2.6496...  0.1979 sec/batch
Epoch: 2/20...  Training Step: 40...  Training loss: 2.6478...  0.2002 sec/batch
Epoch: 2/20...  Training Step: 41...  Training loss: 2.6492...  0.2014 sec/batch
Epoch: 2/20...  Training Step: 42...  Training loss: 2.6448...  0.1961 sec/batch
Epoch: 2/20...  Training Step: 43...  Training loss: 2.7167...  0.2021 sec/batch
Epoch: 2/20...  Training Step: 44...  Training loss: 2.7496...  0.1967 sec/batch
Epoch: 3/20...  Training Step: 45...  Training loss: 2.7290...  0.1985 sec/batch
Epoch: 3/20...  Training Step: 46...  Training loss: 2.6206...  0.1946 sec/batch
Epoch: 3/20...  Training Step: 47...  Training loss: 2.6392...  0.1954 sec/batch
Epoch: 3/20...  Training Step: 48...  Training loss: 2.6888...  0.1952 sec/batch
Epoch: 3/20...  Training Step: 49...  Training loss: 2.6631...  0.1974 sec/batch
Epoch: 3/20...  Training Step: 50...  Training loss: 2.7016...  0.1940 sec/batch
Epoch: 3/20...  Training Step: 51...  Training loss: 2.6520...  0.1966 sec/batch
Epoch: 3/20...  Training Step: 52...  Training loss: 2.6856...  0.2044 sec/batch
Epoch: 3/20...  Training Step: 53...  Training loss: 2.7019...  0.1957 sec/batch
Epoch: 3/20...  Training Step: 54...  Training loss: 2.7177...  0.1966 sec/batch
Epoch: 3/20...  Training Step: 55...  Training loss: 2.6445...  0.1939 sec/batch
Epoch: 3/20...  Training Step: 56...  Training loss: 2.6912...  0.2005 sec/batch
Epoch: 3/20...  Training Step: 57...  Training loss: 2.6760...  0.2006 sec/batch
Epoch: 3/20...  Training Step: 58...  Training loss: 2.6522...  0.1962 sec/batch
Epoch: 3/20...  Training Step: 59...  Training loss: 2.6003...  0.1981 sec/batch
Epoch: 3/20...  Training Step: 60...  Training loss: 2.6445...  0.1930 sec/batch
Epoch: 3/20...  Training Step: 61...  Training loss: 2.6463...  0.1899 sec/batch
Epoch: 3/20...  Training Step: 62...  Training loss: 2.6384...  0.1967 sec/batch
Epoch: 3/20...  Training Step: 63...  Training loss: 2.6448...  0.2371 sec/batch
Epoch: 3/20...  Training Step: 64...  Training loss: 2.6235...  0.2386 sec/batch
Epoch: 3/20...  Training Step: 65...  Training loss: 2.6772...  0.2552 sec/batch
Epoch: 3/20...  Training Step: 66...  Training loss: 2.6929...  0.2054 sec/batch
Epoch: 4/20...  Training Step: 67...  Training loss: 2.7200...  0.1956 sec/batch
Epoch: 4/20...  Training Step: 68...  Training loss: 2.5720...  0.1991 sec/batch
Epoch: 4/20...  Training Step: 69...  Training loss: 2.5965...  0.1946 sec/batch
Epoch: 4/20...  Training Step: 70...  Training loss: 2.6646...  0.1958 sec/batch
Epoch: 4/20...  Training Step: 71...  Training loss: 2.6461...  0.1989 sec/batch
Epoch: 4/20...  Training Step: 72...  Training loss: 2.6559...  0.1981 sec/batch
Epoch: 4/20...  Training Step: 73...  Training loss: 2.6056...  0.1960 sec/batch
Epoch: 4/20...  Training Step: 74...  Training loss: 2.6260...  0.1971 sec/batch
Epoch: 4/20...  Training Step: 75...  Training loss: 2.6760...  0.2043 sec/batch
Epoch: 4/20...  Training Step: 76...  Training loss: 2.6804...  0.1971 sec/batch
Epoch: 4/20...  Training Step: 77...  Training loss: 2.6051...  0.1948 sec/batch
Epoch: 4/20...  Training Step: 78...  Training loss: 2.6566...  0.1943 sec/batch
Epoch: 4/20...  Training Step: 79...  Training loss: 2.6440...  0.1973 sec/batch
Epoch: 4/20...  Training Step: 80...  Training loss: 2.6250...  0.1964 sec/batch
Epoch: 4/20...  Training Step: 81...  Training loss: 2.5706...  0.1976 sec/batch
Epoch: 4/20...  Training Step: 82...  Training loss: 2.6083...  0.1945 sec/batch
Epoch: 4/20...  Training Step: 83...  Training loss: 2.6107...  0.1973 sec/batch
Epoch: 4/20...  Training Step: 84...  Training loss: 2.5907...  0.1981 sec/batch
Epoch: 4/20...  Training Step: 85...  Training loss: 2.5868...  0.1963 sec/batch
Epoch: 4/20...  Training Step: 86...  Training loss: 2.5867...  0.1947 sec/batch
Epoch: 4/20...  Training Step: 87...  Training loss: 2.6494...  0.1976 sec/batch
Epoch: 4/20...  Training Step: 88...  Training loss: 2.6732...  0.1888 sec/batch
Epoch: 5/20...  Training Step: 89...  Training loss: 2.6731...  0.1932 sec/batch
Epoch: 5/20...  Training Step: 90...  Training loss: 2.5603...  0.1934 sec/batch
Epoch: 5/20...  Training Step: 91...  Training loss: 2.5827...  0.1980 sec/batch
Epoch: 5/20...  Training Step: 92...  Training loss: 2.6048...  0.1942 sec/batch
Epoch: 5/20...  Training Step: 93...  Training loss: 2.6005...  0.1959 sec/batch
Epoch: 5/20...  Training Step: 94...  Training loss: 2.6075...  0.2106 sec/batch
Epoch: 5/20...  Training Step: 95...  Training loss: 2.5643...  0.2176 sec/batch
Epoch: 5/20...  Training Step: 96...  Training loss: 2.6149...  0.2348 sec/batch
Epoch: 5/20...  Training Step: 97...  Training loss: 2.6351...  0.1953 sec/batch
Epoch: 5/20...  Training Step: 98...  Training loss: 2.6341...  0.1978 sec/batch
Epoch: 5/20...  Training Step: 99...  Training loss: 2.5779...  0.1977 sec/batch
Epoch: 5/20...  Training Step: 100...  Training loss: 2.6359...  0.1973 sec/batch
Epoch: 5/20...  Training Step: 101...  Training loss: 2.6016...  0.1983 sec/batch
Epoch: 5/20...  Training Step: 102...  Training loss: 2.6109...  0.1987 sec/batch
Epoch: 5/20...  Training Step: 103...  Training loss: 2.5134...  0.1961 sec/batch
Epoch: 5/20...  Training Step: 104...  Training loss: 2.5794...  0.1974 sec/batch
Epoch: 5/20...  Training Step: 105...  Training loss: 2.5954...  0.1946 sec/batch
Epoch: 5/20...  Training Step: 106...  Training loss: 2.5846...  0.2171 sec/batch
Epoch: 5/20...  Training Step: 107...  Training loss: 2.6301...  0.1941 sec/batch
Epoch: 5/20...  Training Step: 108...  Training loss: 2.5966...  0.1966 sec/batch
Epoch: 5/20...  Training Step: 109...  Training loss: 2.6678...  0.1941 sec/batch
Epoch: 5/20...  Training Step: 110...  Training loss: 2.6512...  0.1976 sec/batch
Epoch: 6/20...  Training Step: 111...  Training loss: 2.6860...  0.2403 sec/batch
Epoch: 6/20...  Training Step: 112...  Training loss: 2.5718...  0.2219 sec/batch
Epoch: 6/20...  Training Step: 113...  Training loss: 2.5743...  0.2380 sec/batch
Epoch: 6/20...  Training Step: 114...  Training loss: 2.6054...  0.1947 sec/batch
Epoch: 6/20...  Training Step: 115...  Training loss: 2.6234...  0.1902 sec/batch
Epoch: 6/20...  Training Step: 116...  Training loss: 2.6194...  0.1973 sec/batch
Epoch: 6/20...  Training Step: 117...  Training loss: 2.5847...  0.2240 sec/batch
Epoch: 6/20...  Training Step: 118...  Training loss: 2.6187...  0.2005 sec/batch
Epoch: 6/20...  Training Step: 119...  Training loss: 2.6322...  0.2338 sec/batch
Epoch: 6/20...  Training Step: 120...  Training loss: 2.6203...  0.2004 sec/batch
Epoch: 6/20...  Training Step: 121...  Training loss: 2.5861...  0.2324 sec/batch
Epoch: 6/20...  Training Step: 122...  Training loss: 2.6304...  0.2044 sec/batch
Epoch: 6/20...  Training Step: 123...  Training loss: 2.5970...  0.2025 sec/batch
Epoch: 6/20...  Training Step: 124...  Training loss: 2.6197...  0.1962 sec/batch
Epoch: 6/20...  Training Step: 125...  Training loss: 2.5401...  0.2367 sec/batch
Epoch: 6/20...  Training Step: 126...  Training loss: 2.5790...  0.2046 sec/batch
Epoch: 6/20...  Training Step: 127...  Training loss: 2.5632...  0.2157 sec/batch
Epoch: 6/20...  Training Step: 128...  Training loss: 2.5084...  0.1981 sec/batch
Epoch: 6/20...  Training Step: 129...  Training loss: 2.5464...  0.2198 sec/batch
Epoch: 6/20...  Training Step: 130...  Training loss: 2.5387...  0.2007 sec/batch
Epoch: 6/20...  Training Step: 131...  Training loss: 2.6072...  0.2265 sec/batch
Epoch: 6/20...  Training Step: 132...  Training loss: 2.6009...  0.1933 sec/batch
Epoch: 7/20...  Training Step: 133...  Training loss: 2.6259...  0.2187 sec/batch
Epoch: 7/20...  Training Step: 134...  Training loss: 2.5137...  0.1968 sec/batch
Epoch: 7/20...  Training Step: 135...  Training loss: 2.5344...  0.2489 sec/batch
Epoch: 7/20...  Training Step: 136...  Training loss: 2.5667...  0.1988 sec/batch
Epoch: 7/20...  Training Step: 137...  Training loss: 2.5550...  0.2042 sec/batch
Epoch: 7/20...  Training Step: 138...  Training loss: 2.5683...  0.2123 sec/batch
Epoch: 7/20...  Training Step: 139...  Training loss: 2.5476...  0.1980 sec/batch
Epoch: 7/20...  Training Step: 140...  Training loss: 2.5858...  0.2243 sec/batch
Epoch: 7/20...  Training Step: 141...  Training loss: 2.5675...  0.2049 sec/batch
Epoch: 7/20...  Training Step: 142...  Training loss: 2.5763...  0.2351 sec/batch
Epoch: 7/20...  Training Step: 143...  Training loss: 2.5206...  0.1956 sec/batch
Epoch: 7/20...  Training Step: 144...  Training loss: 2.5694...  0.2032 sec/batch
Epoch: 7/20...  Training Step: 145...  Training loss: 2.5654...  0.2291 sec/batch
Epoch: 7/20...  Training Step: 146...  Training loss: 2.5505...  0.2528 sec/batch
Epoch: 7/20...  Training Step: 147...  Training loss: 2.4709...  0.2105 sec/batch
Epoch: 7/20...  Training Step: 148...  Training loss: 2.5071...  0.2091 sec/batch
Epoch: 7/20...  Training Step: 149...  Training loss: 2.5106...  0.2035 sec/batch
Epoch: 7/20...  Training Step: 150...  Training loss: 2.4896...  0.2267 sec/batch
Epoch: 7/20...  Training Step: 151...  Training loss: 2.5005...  0.1965 sec/batch
Epoch: 7/20...  Training Step: 152...  Training loss: 2.4949...  0.2219 sec/batch
Epoch: 7/20...  Training Step: 153...  Training loss: 2.5613...  0.1965 sec/batch
Epoch: 7/20...  Training Step: 154...  Training loss: 2.5548...  0.2089 sec/batch
Epoch: 8/20...  Training Step: 155...  Training loss: 2.5832...  0.2080 sec/batch
Epoch: 8/20...  Training Step: 156...  Training loss: 2.4826...  0.2068 sec/batch
Epoch: 8/20...  Training Step: 157...  Training loss: 2.4624...  0.2062 sec/batch
Epoch: 8/20...  Training Step: 158...  Training loss: 2.5060...  0.2010 sec/batch
Epoch: 8/20...  Training Step: 159...  Training loss: 2.4940...  0.2255 sec/batch
Epoch: 8/20...  Training Step: 160...  Training loss: 2.5166...  0.2038 sec/batch
Epoch: 8/20...  Training Step: 161...  Training loss: 2.4736...  0.2134 sec/batch
Epoch: 8/20...  Training Step: 162...  Training loss: 2.5232...  0.2008 sec/batch
Epoch: 8/20...  Training Step: 163...  Training loss: 2.5447...  0.2148 sec/batch
Epoch: 8/20...  Training Step: 164...  Training loss: 2.5229...  0.2091 sec/batch
Epoch: 8/20...  Training Step: 165...  Training loss: 2.4914...  0.1939 sec/batch
Epoch: 8/20...  Training Step: 166...  Training loss: 2.5227...  0.2343 sec/batch
Epoch: 8/20...  Training Step: 167...  Training loss: 2.5051...  0.1960 sec/batch
Epoch: 8/20...  Training Step: 168...  Training loss: 2.5072...  0.2418 sec/batch
Epoch: 8/20...  Training Step: 169...  Training loss: 2.4512...  0.1953 sec/batch
Epoch: 8/20...  Training Step: 170...  Training loss: 2.4845...  0.1958 sec/batch
Epoch: 8/20...  Training Step: 171...  Training loss: 2.4716...  0.1995 sec/batch
Epoch: 8/20...  Training Step: 172...  Training loss: 2.4484...  0.2009 sec/batch
Epoch: 8/20...  Training Step: 173...  Training loss: 2.4374...  0.1966 sec/batch
Epoch: 8/20...  Training Step: 174...  Training loss: 2.4662...  0.2314 sec/batch
Epoch: 8/20...  Training Step: 175...  Training loss: 2.5173...  0.2121 sec/batch
Epoch: 8/20...  Training Step: 176...  Training loss: 2.5215...  0.2047 sec/batch
Epoch: 9/20...  Training Step: 177...  Training loss: 2.5351...  0.2172 sec/batch
Epoch: 9/20...  Training Step: 178...  Training loss: 2.4374...  0.2086 sec/batch
Epoch: 9/20...  Training Step: 179...  Training loss: 2.4444...  0.2039 sec/batch
Epoch: 9/20...  Training Step: 180...  Training loss: 2.4677...  0.2115 sec/batch
Epoch: 9/20...  Training Step: 181...  Training loss: 2.4438...  0.2015 sec/batch
Epoch: 9/20...  Training Step: 182...  Training loss: 2.4743...  0.1978 sec/batch
Epoch: 9/20...  Training Step: 183...  Training loss: 2.4468...  0.2155 sec/batch
Epoch: 9/20...  Training Step: 184...  Training loss: 2.5013...  0.2012 sec/batch
Epoch: 9/20...  Training Step: 185...  Training loss: 2.5324...  0.2249 sec/batch
Epoch: 9/20...  Training Step: 186...  Training loss: 2.5224...  0.2007 sec/batch
Epoch: 9/20...  Training Step: 187...  Training loss: 2.4287...  0.1981 sec/batch
Epoch: 9/20...  Training Step: 188...  Training loss: 2.4838...  0.2445 sec/batch
Epoch: 9/20...  Training Step: 189...  Training loss: 2.4925...  0.2165 sec/batch
Epoch: 9/20...  Training Step: 190...  Training loss: 2.4765...  0.2434 sec/batch
Epoch: 9/20...  Training Step: 191...  Training loss: 2.4065...  0.1942 sec/batch
Epoch: 9/20...  Training Step: 192...  Training loss: 2.4425...  0.2035 sec/batch
Epoch: 9/20...  Training Step: 193...  Training loss: 2.4646...  0.1953 sec/batch
Epoch: 9/20...  Training Step: 194...  Training loss: 2.4351...  0.2006 sec/batch
Epoch: 9/20...  Training Step: 195...  Training loss: 2.4467...  0.1951 sec/batch
Epoch: 9/20...  Training Step: 196...  Training loss: 2.4286...  0.1944 sec/batch
Epoch: 9/20...  Training Step: 197...  Training loss: 2.4790...  0.1947 sec/batch
Epoch: 9/20...  Training Step: 198...  Training loss: 2.5182...  0.1951 sec/batch
Epoch: 10/20...  Training Step: 199...  Training loss: 2.4920...  0.1974 sec/batch
Epoch: 10/20...  Training Step: 200...  Training loss: 2.4029...  0.2030 sec/batch
Epoch: 10/20...  Training Step: 201...  Training loss: 2.3951...  0.2112 sec/batch
Epoch: 10/20...  Training Step: 202...  Training loss: 2.4377...  0.1959 sec/batch
Epoch: 10/20...  Training Step: 203...  Training loss: 2.4339...  0.1974 sec/batch
Epoch: 10/20...  Training Step: 204...  Training loss: 2.4328...  0.1946 sec/batch
Epoch: 10/20...  Training Step: 205...  Training loss: 2.3988...  0.2036 sec/batch
Epoch: 10/20...  Training Step: 206...  Training loss: 2.4408...  0.1938 sec/batch
Epoch: 10/20...  Training Step: 207...  Training loss: 2.4863...  0.2303 sec/batch
Epoch: 10/20...  Training Step: 208...  Training loss: 2.4537...  0.2026 sec/batch
Epoch: 10/20...  Training Step: 209...  Training loss: 2.4296...  0.1958 sec/batch
Epoch: 10/20...  Training Step: 210...  Training loss: 2.4634...  0.1996 sec/batch
Epoch: 10/20...  Training Step: 211...  Training loss: 2.4358...  0.1964 sec/batch
Epoch: 10/20...  Training Step: 212...  Training loss: 2.4538...  0.1978 sec/batch
Epoch: 10/20...  Training Step: 213...  Training loss: 2.3894...  0.1914 sec/batch
Epoch: 10/20...  Training Step: 214...  Training loss: 2.4211...  0.2040 sec/batch
Epoch: 10/20...  Training Step: 215...  Training loss: 2.4313...  0.1937 sec/batch
Epoch: 10/20...  Training Step: 216...  Training loss: 2.4005...  0.1975 sec/batch
Epoch: 10/20...  Training Step: 217...  Training loss: 2.4216...  0.2009 sec/batch
Epoch: 10/20...  Training Step: 218...  Training loss: 2.3858...  0.2411 sec/batch
Epoch: 10/20...  Training Step: 219...  Training loss: 2.4541...  0.1975 sec/batch
Epoch: 10/20...  Training Step: 220...  Training loss: 2.4527...  0.2457 sec/batch
Epoch: 11/20...  Training Step: 221...  Training loss: 2.4655...  0.2101 sec/batch
Epoch: 11/20...  Training Step: 222...  Training loss: 2.3610...  0.1972 sec/batch
Epoch: 11/20...  Training Step: 223...  Training loss: 2.3684...  0.2257 sec/batch
Epoch: 11/20...  Training Step: 224...  Training loss: 2.4262...  0.1986 sec/batch
Epoch: 11/20...  Training Step: 225...  Training loss: 2.4001...  0.2262 sec/batch
Epoch: 11/20...  Training Step: 226...  Training loss: 2.4347...  0.2079 sec/batch
Epoch: 11/20...  Training Step: 227...  Training loss: 2.4015...  0.2195 sec/batch
Epoch: 11/20...  Training Step: 228...  Training loss: 2.4127...  0.2033 sec/batch
Epoch: 11/20...  Training Step: 229...  Training loss: 2.4494...  0.2077 sec/batch
Epoch: 11/20...  Training Step: 230...  Training loss: 2.4542...  0.2121 sec/batch
Epoch: 11/20...  Training Step: 231...  Training loss: 2.4152...  0.2065 sec/batch
Epoch: 11/20...  Training Step: 232...  Training loss: 2.4420...  0.2143 sec/batch
Epoch: 11/20...  Training Step: 233...  Training loss: 2.4303...  0.2178 sec/batch
Epoch: 11/20...  Training Step: 234...  Training loss: 2.4009...  0.1963 sec/batch
Epoch: 11/20...  Training Step: 235...  Training loss: 2.3386...  0.2282 sec/batch
Epoch: 11/20...  Training Step: 236...  Training loss: 2.3776...  0.1965 sec/batch
Epoch: 11/20...  Training Step: 237...  Training loss: 2.3845...  0.2365 sec/batch
Epoch: 11/20...  Training Step: 238...  Training loss: 2.3859...  0.1954 sec/batch
Epoch: 11/20...  Training Step: 239...  Training loss: 2.3900...  0.1997 sec/batch
Epoch: 11/20...  Training Step: 240...  Training loss: 2.3509...  0.1945 sec/batch
Epoch: 11/20...  Training Step: 241...  Training loss: 2.4065...  0.2023 sec/batch
Epoch: 11/20...  Training Step: 242...  Training loss: 2.4385...  0.2300 sec/batch
Epoch: 12/20...  Training Step: 243...  Training loss: 2.4338...  0.1938 sec/batch
Epoch: 12/20...  Training Step: 244...  Training loss: 2.3410...  0.2286 sec/batch
Epoch: 12/20...  Training Step: 245...  Training loss: 2.3619...  0.1987 sec/batch
Epoch: 12/20...  Training Step: 246...  Training loss: 2.3871...  0.2431 sec/batch
Epoch: 12/20...  Training Step: 247...  Training loss: 2.3561...  0.1970 sec/batch
Epoch: 12/20...  Training Step: 248...  Training loss: 2.3753...  0.1977 sec/batch
Epoch: 12/20...  Training Step: 249...  Training loss: 2.3637...  0.2244 sec/batch
Epoch: 12/20...  Training Step: 250...  Training loss: 2.3901...  0.2584 sec/batch
Epoch: 12/20...  Training Step: 251...  Training loss: 2.4222...  0.2007 sec/batch
Epoch: 12/20...  Training Step: 252...  Training loss: 2.4045...  0.2192 sec/batch
Epoch: 12/20...  Training Step: 253...  Training loss: 2.3580...  0.2000 sec/batch
Epoch: 12/20...  Training Step: 254...  Training loss: 2.4186...  0.2246 sec/batch
Epoch: 12/20...  Training Step: 255...  Training loss: 2.4082...  0.1938 sec/batch
Epoch: 12/20...  Training Step: 256...  Training loss: 2.3828...  0.2224 sec/batch
Epoch: 12/20...  Training Step: 257...  Training loss: 2.3346...  0.2044 sec/batch
Epoch: 12/20...  Training Step: 258...  Training loss: 2.3632...  0.2283 sec/batch
Epoch: 12/20...  Training Step: 259...  Training loss: 2.3634...  0.1978 sec/batch
Epoch: 12/20...  Training Step: 260...  Training loss: 2.3672...  0.1998 sec/batch
Epoch: 12/20...  Training Step: 261...  Training loss: 2.3565...  0.2171 sec/batch
Epoch: 12/20...  Training Step: 262...  Training loss: 2.3408...  0.1979 sec/batch
Epoch: 12/20...  Training Step: 263...  Training loss: 2.3714...  0.2223 sec/batch
Epoch: 12/20...  Training Step: 264...  Training loss: 2.4132...  0.1964 sec/batch
Epoch: 13/20...  Training Step: 265...  Training loss: 2.3845...  0.1962 sec/batch
Epoch: 13/20...  Training Step: 266...  Training loss: 2.2863...  0.2050 sec/batch
Epoch: 13/20...  Training Step: 267...  Training loss: 2.3313...  0.2200 sec/batch
Epoch: 13/20...  Training Step: 268...  Training loss: 2.3564...  0.1958 sec/batch
Epoch: 13/20...  Training Step: 269...  Training loss: 2.3574...  0.1956 sec/batch
Epoch: 13/20...  Training Step: 270...  Training loss: 2.3533...  0.2150 sec/batch
Epoch: 13/20...  Training Step: 271...  Training loss: 2.3220...  0.1974 sec/batch
Epoch: 13/20...  Training Step: 272...  Training loss: 2.3672...  0.2087 sec/batch
Epoch: 13/20...  Training Step: 273...  Training loss: 2.3810...  0.2087 sec/batch
Epoch: 13/20...  Training Step: 274...  Training loss: 2.3968...  0.2099 sec/batch
Epoch: 13/20...  Training Step: 275...  Training loss: 2.3305...  0.2017 sec/batch
Epoch: 13/20...  Training Step: 276...  Training loss: 2.3923...  0.2093 sec/batch
Epoch: 13/20...  Training Step: 277...  Training loss: 2.3565...  0.2125 sec/batch
Epoch: 13/20...  Training Step: 278...  Training loss: 2.3448...  0.1986 sec/batch
Epoch: 13/20...  Training Step: 279...  Training loss: 2.2974...  0.2175 sec/batch
Epoch: 13/20...  Training Step: 280...  Training loss: 2.3426...  0.1965 sec/batch
Epoch: 13/20...  Training Step: 281...  Training loss: 2.3331...  0.2014 sec/batch
Epoch: 13/20...  Training Step: 282...  Training loss: 2.3247...  0.2178 sec/batch
Epoch: 13/20...  Training Step: 283...  Training loss: 2.3224...  0.1988 sec/batch
Epoch: 13/20...  Training Step: 284...  Training loss: 2.2995...  0.1971 sec/batch
Epoch: 13/20...  Training Step: 285...  Training loss: 2.3833...  0.2194 sec/batch
Epoch: 13/20...  Training Step: 286...  Training loss: 2.3855...  0.1988 sec/batch
Epoch: 14/20...  Training Step: 287...  Training loss: 2.3764...  0.2031 sec/batch
Epoch: 14/20...  Training Step: 288...  Training loss: 2.2732...  0.2109 sec/batch
Epoch: 14/20...  Training Step: 289...  Training loss: 2.2671...  0.1976 sec/batch
Epoch: 14/20...  Training Step: 290...  Training loss: 2.3156...  0.2213 sec/batch
Epoch: 14/20...  Training Step: 291...  Training loss: 2.2969...  0.1976 sec/batch
Epoch: 14/20...  Training Step: 292...  Training loss: 2.3217...  0.2432 sec/batch
Epoch: 14/20...  Training Step: 293...  Training loss: 2.2829...  0.1936 sec/batch
Epoch: 14/20...  Training Step: 294...  Training loss: 2.3239...  0.1961 sec/batch
Epoch: 14/20...  Training Step: 295...  Training loss: 2.3324...  0.1959 sec/batch
Epoch: 14/20...  Training Step: 296...  Training loss: 2.3578...  0.2068 sec/batch
Epoch: 14/20...  Training Step: 297...  Training loss: 2.2989...  0.2106 sec/batch
Epoch: 14/20...  Training Step: 298...  Training loss: 2.3359...  0.1952 sec/batch
Epoch: 14/20...  Training Step: 299...  Training loss: 2.3507...  0.2195 sec/batch
Epoch: 14/20...  Training Step: 300...  Training loss: 2.3323...  0.2051 sec/batch
Epoch: 14/20...  Training Step: 301...  Training loss: 2.2659...  0.1947 sec/batch
Epoch: 14/20...  Training Step: 302...  Training loss: 2.3130...  0.1991 sec/batch
Epoch: 14/20...  Training Step: 303...  Training loss: 2.2633...  0.1962 sec/batch
Epoch: 14/20...  Training Step: 304...  Training loss: 2.2741...  0.2378 sec/batch
Epoch: 14/20...  Training Step: 305...  Training loss: 2.3053...  0.2191 sec/batch
Epoch: 14/20...  Training Step: 306...  Training loss: 2.2726...  0.2274 sec/batch
Epoch: 14/20...  Training Step: 307...  Training loss: 2.3376...  0.2274 sec/batch
Epoch: 14/20...  Training Step: 308...  Training loss: 2.3460...  0.1961 sec/batch
Epoch: 15/20...  Training Step: 309...  Training loss: 2.3497...  0.1947 sec/batch
Epoch: 15/20...  Training Step: 310...  Training loss: 2.2483...  0.2069 sec/batch
Epoch: 15/20...  Training Step: 311...  Training loss: 2.2617...  0.2144 sec/batch
Epoch: 15/20...  Training Step: 312...  Training loss: 2.3047...  0.2069 sec/batch
Epoch: 15/20...  Training Step: 313...  Training loss: 2.2626...  0.2118 sec/batch
Epoch: 15/20...  Training Step: 314...  Training loss: 2.2718...  0.2036 sec/batch
Epoch: 15/20...  Training Step: 315...  Training loss: 2.2596...  0.2071 sec/batch
Epoch: 15/20...  Training Step: 316...  Training loss: 2.2948...  0.2097 sec/batch
Epoch: 15/20...  Training Step: 317...  Training loss: 2.3155...  0.1947 sec/batch
Epoch: 15/20...  Training Step: 318...  Training loss: 2.3123...  0.1951 sec/batch
Epoch: 15/20...  Training Step: 319...  Training loss: 2.2803...  0.2058 sec/batch
Epoch: 15/20...  Training Step: 320...  Training loss: 2.3338...  0.2502 sec/batch
Epoch: 15/20...  Training Step: 321...  Training loss: 2.3165...  0.2001 sec/batch
Epoch: 15/20...  Training Step: 322...  Training loss: 2.2741...  0.2003 sec/batch
Epoch: 15/20...  Training Step: 323...  Training loss: 2.2339...  0.2340 sec/batch
Epoch: 15/20...  Training Step: 324...  Training loss: 2.2699...  0.1945 sec/batch
Epoch: 15/20...  Training Step: 325...  Training loss: 2.2682...  0.2298 sec/batch
Epoch: 15/20...  Training Step: 326...  Training loss: 2.2521...  0.1988 sec/batch
Epoch: 15/20...  Training Step: 327...  Training loss: 2.2796...  0.1981 sec/batch
Epoch: 15/20...  Training Step: 328...  Training loss: 2.2544...  0.2012 sec/batch
Epoch: 15/20...  Training Step: 329...  Training loss: 2.2929...  0.1964 sec/batch
Epoch: 15/20...  Training Step: 330...  Training loss: 2.3081...  0.1963 sec/batch
Epoch: 16/20...  Training Step: 331...  Training loss: 2.3063...  0.1943 sec/batch
Epoch: 16/20...  Training Step: 332...  Training loss: 2.2150...  0.1964 sec/batch
Epoch: 16/20...  Training Step: 333...  Training loss: 2.2322...  0.2040 sec/batch
Epoch: 16/20...  Training Step: 334...  Training loss: 2.2619...  0.1956 sec/batch
Epoch: 16/20...  Training Step: 335...  Training loss: 2.2615...  0.1961 sec/batch
Epoch: 16/20...  Training Step: 336...  Training loss: 2.2687...  0.1965 sec/batch
Epoch: 16/20...  Training Step: 337...  Training loss: 2.2349...  0.1947 sec/batch
Epoch: 16/20...  Training Step: 338...  Training loss: 2.2803...  0.1949 sec/batch
Epoch: 16/20...  Training Step: 339...  Training loss: 2.2713...  0.2027 sec/batch
Epoch: 16/20...  Training Step: 340...  Training loss: 2.2732...  0.1947 sec/batch
Epoch: 16/20...  Training Step: 341...  Training loss: 2.2421...  0.1981 sec/batch
Epoch: 16/20...  Training Step: 342...  Training loss: 2.2932...  0.2313 sec/batch
Epoch: 16/20...  Training Step: 343...  Training loss: 2.2884...  0.2116 sec/batch
Epoch: 16/20...  Training Step: 344...  Training loss: 2.2523...  0.1980 sec/batch
Epoch: 16/20...  Training Step: 345...  Training loss: 2.2165...  0.1976 sec/batch
Epoch: 16/20...  Training Step: 346...  Training loss: 2.2553...  0.1982 sec/batch
Epoch: 16/20...  Training Step: 347...  Training loss: 2.2349...  0.1978 sec/batch
Epoch: 16/20...  Training Step: 348...  Training loss: 2.2376...  0.2004 sec/batch
Epoch: 16/20...  Training Step: 349...  Training loss: 2.2434...  0.1963 sec/batch
Epoch: 16/20...  Training Step: 350...  Training loss: 2.2278...  0.2010 sec/batch
Epoch: 16/20...  Training Step: 351...  Training loss: 2.3001...  0.1961 sec/batch
Epoch: 16/20...  Training Step: 352...  Training loss: 2.2971...  0.2004 sec/batch
Epoch: 17/20...  Training Step: 353...  Training loss: 2.2934...  0.1963 sec/batch
Epoch: 17/20...  Training Step: 354...  Training loss: 2.1828...  0.2164 sec/batch
Epoch: 17/20...  Training Step: 355...  Training loss: 2.1847...  0.2690 sec/batch
Epoch: 17/20...  Training Step: 356...  Training loss: 2.2346...  0.2076 sec/batch
Epoch: 17/20...  Training Step: 357...  Training loss: 2.2067...  0.2172 sec/batch
Epoch: 17/20...  Training Step: 358...  Training loss: 2.2331...  0.2048 sec/batch
Epoch: 17/20...  Training Step: 359...  Training loss: 2.2038...  0.2399 sec/batch
Epoch: 17/20...  Training Step: 360...  Training loss: 2.2417...  0.1993 sec/batch
Epoch: 17/20...  Training Step: 361...  Training loss: 2.2465...  0.2151 sec/batch
Epoch: 17/20...  Training Step: 362...  Training loss: 2.2706...  0.1991 sec/batch
Epoch: 17/20...  Training Step: 363...  Training loss: 2.2165...  0.1952 sec/batch
Epoch: 17/20...  Training Step: 364...  Training loss: 2.2828...  0.1971 sec/batch
Epoch: 17/20...  Training Step: 365...  Training loss: 2.2545...  0.1969 sec/batch
Epoch: 17/20...  Training Step: 366...  Training loss: 2.2281...  0.2191 sec/batch
Epoch: 17/20...  Training Step: 367...  Training loss: 2.2006...  0.1955 sec/batch
Epoch: 17/20...  Training Step: 368...  Training loss: 2.2392...  0.1993 sec/batch
Epoch: 17/20...  Training Step: 369...  Training loss: 2.2260...  0.2110 sec/batch
Epoch: 17/20...  Training Step: 370...  Training loss: 2.2305...  0.1956 sec/batch
Epoch: 17/20...  Training Step: 371...  Training loss: 2.2233...  0.2011 sec/batch
Epoch: 17/20...  Training Step: 372...  Training loss: 2.1843...  0.2065 sec/batch
Epoch: 17/20...  Training Step: 373...  Training loss: 2.2137...  0.2050 sec/batch
Epoch: 17/20...  Training Step: 374...  Training loss: 2.2573...  0.1964 sec/batch
Epoch: 18/20...  Training Step: 375...  Training loss: 2.2570...  0.2284 sec/batch
Epoch: 18/20...  Training Step: 376...  Training loss: 2.1726...  0.1997 sec/batch
Epoch: 18/20...  Training Step: 377...  Training loss: 2.1763...  0.2307 sec/batch
Epoch: 18/20...  Training Step: 378...  Training loss: 2.2192...  0.1965 sec/batch
Epoch: 18/20...  Training Step: 379...  Training loss: 2.2031...  0.1986 sec/batch
Epoch: 18/20...  Training Step: 380...  Training loss: 2.1867...  0.2427 sec/batch
Epoch: 18/20...  Training Step: 381...  Training loss: 2.1693...  0.2171 sec/batch
Epoch: 18/20...  Training Step: 382...  Training loss: 2.2223...  0.2048 sec/batch
Epoch: 18/20...  Training Step: 383...  Training loss: 2.2091...  0.2178 sec/batch
Epoch: 18/20...  Training Step: 384...  Training loss: 2.2487...  0.1959 sec/batch
Epoch: 18/20...  Training Step: 385...  Training loss: 2.2034...  0.1965 sec/batch
Epoch: 18/20...  Training Step: 386...  Training loss: 2.2413...  0.2165 sec/batch
Epoch: 18/20...  Training Step: 387...  Training loss: 2.2021...  0.1961 sec/batch
Epoch: 18/20...  Training Step: 388...  Training loss: 2.2091...  0.1975 sec/batch
Epoch: 18/20...  Training Step: 389...  Training loss: 2.1715...  0.1937 sec/batch
Epoch: 18/20...  Training Step: 390...  Training loss: 2.1953...  0.2054 sec/batch
Epoch: 18/20...  Training Step: 391...  Training loss: 2.1648...  0.1953 sec/batch
Epoch: 18/20...  Training Step: 392...  Training loss: 2.1906...  0.1976 sec/batch
Epoch: 18/20...  Training Step: 393...  Training loss: 2.1868...  0.1953 sec/batch
Epoch: 18/20...  Training Step: 394...  Training loss: 2.1649...  0.1977 sec/batch
Epoch: 18/20...  Training Step: 395...  Training loss: 2.1845...  0.1978 sec/batch
Epoch: 18/20...  Training Step: 396...  Training loss: 2.2172...  0.1965 sec/batch
Epoch: 19/20...  Training Step: 397...  Training loss: 2.2440...  0.1975 sec/batch
Epoch: 19/20...  Training Step: 398...  Training loss: 2.1196...  0.1964 sec/batch
Epoch: 19/20...  Training Step: 399...  Training loss: 2.1653...  0.1974 sec/batch
Epoch: 19/20...  Training Step: 400...  Training loss: 2.1824...  0.1943 sec/batch
Epoch: 19/20...  Training Step: 401...  Training loss: 2.1606...  0.1947 sec/batch
Epoch: 19/20...  Training Step: 402...  Training loss: 2.1746...  0.2016 sec/batch
Epoch: 19/20...  Training Step: 403...  Training loss: 2.1311...  0.1974 sec/batch
Epoch: 19/20...  Training Step: 404...  Training loss: 2.1874...  0.1995 sec/batch
Epoch: 19/20...  Training Step: 405...  Training loss: 2.1759...  0.1965 sec/batch
Epoch: 19/20...  Training Step: 406...  Training loss: 2.1830...  0.1990 sec/batch
Epoch: 19/20...  Training Step: 407...  Training loss: 2.1291...  0.1952 sec/batch
Epoch: 19/20...  Training Step: 408...  Training loss: 2.2140...  0.1939 sec/batch
Epoch: 19/20...  Training Step: 409...  Training loss: 2.1662...  0.1958 sec/batch
Epoch: 19/20...  Training Step: 410...  Training loss: 2.1866...  0.1967 sec/batch
Epoch: 19/20...  Training Step: 411...  Training loss: 2.1387...  0.1960 sec/batch
Epoch: 19/20...  Training Step: 412...  Training loss: 2.1913...  0.1953 sec/batch
Epoch: 19/20...  Training Step: 413...  Training loss: 2.1760...  0.1949 sec/batch
Epoch: 19/20...  Training Step: 414...  Training loss: 2.1479...  0.1973 sec/batch
Epoch: 19/20...  Training Step: 415...  Training loss: 2.1530...  0.1947 sec/batch
Epoch: 19/20...  Training Step: 416...  Training loss: 2.1443...  0.1979 sec/batch
Epoch: 19/20...  Training Step: 417...  Training loss: 2.1835...  0.1940 sec/batch
Epoch: 19/20...  Training Step: 418...  Training loss: 2.2034...  0.1959 sec/batch
Epoch: 20/20...  Training Step: 419...  Training loss: 2.1811...  0.1947 sec/batch
Epoch: 20/20...  Training Step: 420...  Training loss: 2.0937...  0.2038 sec/batch
Epoch: 20/20...  Training Step: 421...  Training loss: 2.1250...  0.1969 sec/batch
Epoch: 20/20...  Training Step: 422...  Training loss: 2.1319...  0.2020 sec/batch
Epoch: 20/20...  Training Step: 423...  Training loss: 2.1325...  0.1978 sec/batch
Epoch: 20/20...  Training Step: 424...  Training loss: 2.1310...  0.1981 sec/batch
Epoch: 20/20...  Training Step: 425...  Training loss: 2.1141...  0.2034 sec/batch
Epoch: 20/20...  Training Step: 426...  Training loss: 2.1610...  0.1958 sec/batch
Epoch: 20/20...  Training Step: 427...  Training loss: 2.1472...  0.1988 sec/batch
Epoch: 20/20...  Training Step: 428...  Training loss: 2.1890...  0.2090 sec/batch
Epoch: 20/20...  Training Step: 429...  Training loss: 2.1335...  0.2030 sec/batch
Epoch: 20/20...  Training Step: 430...  Training loss: 2.1888...  0.2013 sec/batch
Epoch: 20/20...  Training Step: 431...  Training loss: 2.1665...  0.1961 sec/batch
Epoch: 20/20...  Training Step: 432...  Training loss: 2.1620...  0.1963 sec/batch
Epoch: 20/20...  Training Step: 433...  Training loss: 2.1163...  0.1966 sec/batch
Epoch: 20/20...  Training Step: 434...  Training loss: 2.1506...  0.1941 sec/batch
Epoch: 20/20...  Training Step: 435...  Training loss: 2.1393...  0.1942 sec/batch
Epoch: 20/20...  Training Step: 436...  Training loss: 2.1375...  0.2010 sec/batch
Epoch: 20/20...  Training Step: 437...  Training loss: 2.1178...  0.1936 sec/batch
Epoch: 20/20...  Training Step: 438...  Training loss: 2.1173...  0.1971 sec/batch
Epoch: 20/20...  Training Step: 439...  Training loss: 2.1741...  0.1960 sec/batch
Epoch: 20/20...  Training Step: 440...  Training loss: 2.1792...  0.1965 sec/batch

In [78]:
tf.train.get_checkpoint_state('checkpoints')


Out[78]:
model_checkpoint_path: "checkpoints/i440_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i200_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i400_l512.ckpt"
all_model_checkpoint_paths: "checkpoints/i440_l512.ckpt"

In [79]:
def pick_top_n(preds, vocab_size, top_n=5):
    p = np.squeeze(preds)
    p[np.argsort(p)[:-top_n]] = 0
    p = p / np.sum(p)
    c = np.random.choice(vocab_size, 1, p=p)[0]
    return c

In [80]:
def sample(checkpoint, n_samples, lstm_size, vocab_size, prime="The "):
    samples = [c for c in prime]
    model = CharRNN(len(vocab), lstm_size=lstm_size, sampling=True)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, checkpoint)
        new_state = sess.run(model.initial_state)
        for c in prime:
            x = np.zeros((1, 1))
            x[0,0] = vocab_to_int[c]
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

        c = pick_top_n(preds, len(vocab))
        samples.append(int_to_vocab[c])

        for i in range(n_samples):
            x[0,0] = c
            feed = {model.inputs: x,
                    model.keep_prob: 1.,
                    model.initial_state: new_state}
            preds, new_state = sess.run([model.prediction, model.final_state], 
                                         feed_dict=feed)

            c = pick_top_n(preds, len(vocab))
            samples.append(int_to_vocab[c])
        
    return ''.join(samples)

In [81]:
tf.train.latest_checkpoint('checkpoints')


Out[81]:
'checkpoints/i440_l512.ckpt'

In [82]:
checkpoint = tf.train.latest_checkpoint('checkpoints')
samp = sample(checkpoint, 2000, lstm_size, len(vocab), prime="Far")
print(samp)


INFO:tensorflow:Restoring parameters from checkpoints/i440_l512.ckpt
Farine Bey uanaue te mtute ”鸿渐道:“我是你在家去吃你,你的人当然知道,你不是我一个人说。”

“我不要看你,我不知道。”

“你的脸上添了‘请方家’,不是我们在旁人——咦,你们瞧不起我的——”

“我不知道这是方先生的情感,好不是一句话的人多——”鸿渐说:“不会你这样,你不要说,我不知道。你们是个恋爱,可是那样的女人,你知道你,这些话真都是心,要开玩笑的事,反正我对老太太的话。”柔嘉道:“我也不肯说。”鸿渐道:“这些你们家都是不知道你的年来么?”柔嘉道:“你这儿是粗人的话,要你吃饭的?你在你们家里人不到你们去,吃饭的事没有?你在家里吃得不起!我不要为什么不懂,我是你们家伙,我们也不知道,这就是汪处太的苦领。”鸿渐道:“我也不懂道:“你的人家不是去,你也不会,我在你气不住你。”

“我不知道,你是不过的。”

辛楣道:“我不是方老先生——你教授也不会高兴,咱们高校长开个大学,带下行的礼物,还不甚高。。我说我不要上去过他的,他看得很词,请他们要到报馆来了,请客,他们俩请客。还把客交给他们吃饭碗,就说他生气的时候,他们这张光。”鸿渐想着,并且对他们说出洋人的态度。电影在外国的三个字,他们得在大家里的人光和他在房子上,他也把他看见名目,彼此就目的爬,就在为了那么大闹。遯翁道:“这儿做事子,不是你替你去看。”辛楣道:“我要给你看见了,你们两间房子,也要请他们进去。”

鸿渐道:“你不肯为什么?”

辛楣道:“我说你,我也不知道。”

辛楣道:“我这人说话,不会看见你,只有你们一个人来了。”

鸿渐道:“谁也不过?辛楣,今天早晨去看到两位。”辛楣道:“你又不知道这样!我不是你的人在身体里的苦。”鸿渐虽然已经感激,绝了一笑。

“方先生,你说我没有过来,你们的话很比不通的。”

“我的人都不会说,我跟你讲的话很好。”

鸿渐和辛楣在外国讲话,不但自己,可是自己也像一个人跟陆子潇的论法来是个两个“名人的亲戚在外?”这几位,

“为有名明天到外国文学系的位。他们学校是哲学系教育组哲学系,学校是不位教师师学生的教授——咦当一次也没有他们道:“这位先生去做公共同学的时候的钱,当然以为这些事得很了。我知道,就不过一次教授。我想是个副教授的高年[的]了,请他做事,学校长声音院——请声声音里——”——“当然”——“我就说了,有什么系——”说时想不着自己说:“我想你做事,你没有讲论,他想来我们不知道。他们对学生的话,不过一句,是他们的事全要一个同学。”

鸿渐不耐烦道:“我可以你大家气都不是了!我是个人,你不是礼物。你为什么不讲?我说不爱呀!”

“我看了一个人,我不知道。”

鸿渐道:“你这么?明白了不要要送他。我不是个电话,我以为你的事很了,我想你不在回家。”

“我也不知道。你有个是面,我说不住,你跟你去一个同事。”

辛楣和鸿渐笑道:“那两种人家真知道,不过你们自己都不是我的你们这样的人。”

鸿渐正经济程度,要对他口来,因为他写信给他一阵人的事,他们这样人不愿意自己的失望,自己也没想到外交他们分了。方鸿渐忙开始辞辞,鸿渐道:“为什么不知道你回头?你也是我的钱,不是你说。我有了办法,请他们吃饭,还不要喝了!”

鸿渐正在脸方先生,问鸿渐问

“不会有关系。赵先生没有?”鸿渐道:“我也不能告诉她了,我们不定白了。”

辛楣和鸿渐笑一说,“你”高松年做事不愿意说。辛楣不知道,高松年跟自己讲,可以他想不到这种学生。韩学愈不定韩主任。假使他说不定聘书他是不知道,这位小子是不起教书的东西。他想去找时,他们笑得。那两位那位大学家的人先生,刘东方都不会说,他只要刘东方的话真不好,因为他们不来了一点同情的。他们俩虽然像方鸿渐一个人,已经是在外国人的日子里南“有公设带一个地方,是有名的地方。遯翁虽然有三十岁,大学是个未婚妻的女女人,也不出美国的那位姑母跟丈母家是同考回国,那位女长的丈母虽然父亲订婚,那人未婚夫自己家里做人,自己没有留学生。可是一切不爱女人的学生,反正有个无事,只好像个女人全知趣道。他在个女人,跟他生亲俩的事全以分;彼此是赵先生的脸上,正是的眼睛,是不得的!至少不能干地写着,他们要找二十年前时候,大学的列物学人人名名文凭。鸿渐要法国、唐小姐字说中国《方家》里》的刊物,有张小书,他们真有张家的人看。鸿渐这一次,他们是在中国大学史班上,他们听在同学在外国人讲起来,可以通封,叫“苏小姐”的字。他“他真相信!”

鸿渐道:“我想没有教书,那位太太呢!”

“我也不会错。我是你那个人去负心,我们也许不多,你说话都不是人。”

辛楣道:“我有点眼睛,你是我的房子,不是你们那位太太的,你们真不住了。”柔嘉也不过,只是一个人在口面里说:“你的时候不到分付,就条南京带了房子房间房子太搁,我看得用人,你说起来。”

辛楣道:“我也要回答,我没有?”辛

In [ ]: