Translating French to English with Pytorch


In [1]:
%matplotlib inline
import re, pickle, collections, bcolz, numpy as np, keras, sklearn, math, operator, random, time, os
import matplotlib.pyplot as plt


Using TensorFlow backend.

In [2]:
from gensim.models import KeyedVectors

import torch, torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F

Prepare corpus


In [3]:
headlines = pickle.load( open('Dissertation/headlines.pkl', 'rb') )
articles = pickle.load( open('Dissertation/articles.pkl', 'rb') )

In [4]:
pairs = [(' '.join(hdln), ' '.join(art)) for hdln, art in zip(headlines, articles)]

Because it takes a while to load the data, we save the results to make it easier to load in later.


In [9]:
pickle.dump(pairs, open('art-hdln-pairs.pkl', 'wb'))

In [11]:
pairs = pickle.load(open('art-hdln-pairs.pkl', 'rb'))

Special tokens used to pad the end of sentences, and to mark the start of a sentence.


In [5]:
PAD = 0; SOS = 1; UNK = 2

Enumerate the unique words (vocab) in the corpus, and also create the reverse map (word->index). Then use this mapping to encode every sentence as a list of int indices.


In [6]:
def toks2ids(sents, voc_size = 200000):
    voc_cnt = collections.Counter(t for sent in sents for t in sent)
    vocab = sorted(voc_cnt, key=voc_cnt.get, reverse=True)
    vocab = vocab[:voc_size]
    vocab.insert(PAD, "<PAD>")
    vocab.insert(SOS, "<SOS>")
    vocab.insert(UNK, "<UNK>")
    w2id = {w:i for i,w in enumerate(vocab)}
    id2w = {i:w for i,w in enumerate(vocab)}
    ids = [[w2id[t] if t in vocab else UNK for t in sent] for sent in sents]
    return ids, vocab, w2id, id2w, voc_cnt

In [7]:
art_ids, art_vocab, art_w2id, art_id2w, art_counts = toks2ids(articles)
hdln_ids, hdln_vocab, hdln_w2id, hdln_id2w, hdln_counts = toks2ids(headlines)

Word vectors

Stanford's GloVe word vectors can be downloaded from https://nlp.stanford.edu/projects/glove/ (in the code below we have preprocessed them into a bcolz array). We use these because each individual word has a single word vector, which is what we need for translation. Word2vec, on the other hand, often uses multi-word phrases.


In [8]:
def load_glove(loc):
    return (bcolz.open(loc+'.dat')[:],
        pickle.load(open(loc+'_words.pkl','rb'), encoding='latin1'),
        pickle.load(open(loc+'_idx.pkl','rb'), encoding='latin1'))

In [9]:
hdln_vecs, hdln_wv_word, hdln_wv_idx = load_glove('/mnt/cvl-store-0/home/psxca1/data/glove/6B.300d')
hdln_w2v = {w: hdln_vecs[hdln_wv_idx[w]] for w in hdln_wv_word}
n_hdln_vec, dim_hdln_vec = hdln_vecs.shape

In [10]:
hdln_w2v['king']


Out[10]:
array([ 0.0033901 , -0.34614   ,  0.28143999,  0.48381999,  0.59469002,
        0.012965  ,  0.53982002,  0.48232999,  0.21462999, -1.02489996,
       -0.34788001, -0.79000998, -0.15084   ,  0.61374003,  0.042811  ,
        0.19323   ,  0.25461999,  0.32528001,  0.05698   ,  0.063253  ,
       -0.49439001,  0.47336999, -0.16761   ,  0.045594  ,  0.30451   ,
       -0.35416001, -0.34582999, -0.20118   ,  0.25511   ,  0.091111  ,
        0.014651  , -0.017541  , -0.23853999,  0.48214999, -0.9145    ,
       -0.36234999,  0.34735999,  0.028639  , -0.027065  , -0.036481  ,
       -0.067391  , -0.23452   , -0.13772   ,  0.33950999,  0.13415   ,
       -0.13420001,  0.47856   , -0.1842    ,  0.10705   , -0.45833999,
       -0.36085001, -0.22595   ,  0.32881001, -0.13643   ,  0.23128   ,
        0.34268999,  0.42344001,  0.47057   ,  0.479     ,  0.074639  ,
        0.3344    ,  0.10714   , -0.13289   ,  0.58734   ,  0.38615999,
       -0.52237999, -0.22028001, -0.072322  ,  0.32269001,  0.44226   ,
       -0.037382  ,  0.18324   ,  0.058082  ,  0.26938   ,  0.36201999,
        0.13982999,  0.016815  , -0.34426001,  0.48269999,  0.21080001,
        0.75617999, -0.13091999, -0.025741  ,  0.43391001,  0.33893001,
       -0.16438   ,  0.26817   ,  0.68774003,  0.31099999, -0.2509    ,
        0.0027749 , -0.39809   , -0.43399   ,  0.049531  , -0.42686   ,
       -0.094679  ,  0.56924999,  0.28742   , -0.015721  , -0.059162  ,
        0.1912    , -0.59814   ,  0.65486002, -0.31363001,  0.16881   ,
        0.10862   ,  0.075316  ,  0.34093001, -0.14706001,  0.83590001,
        0.39697   ,  0.52358001, -0.0096367 , -0.14406   ,  0.37783   ,
       -0.59600002, -0.063192  , -0.85297   , -0.3098    , -1.05869997,
       -1.02499998,  0.4508    , -0.73324001, -1.24609995, -0.028488  ,
        0.20299   ,  0.00259   ,  0.31995001,  0.35743999,  0.28533   ,
        0.228     ,  0.50955999, -0.35942   ,  0.32683   ,  0.046264  ,
       -0.86896002, -0.27070001, -0.15454   , -0.32152   ,  0.31121001,
        0.44134   ,  0.85189003,  0.21065   , -0.13741   , -0.15358999,
       -0.059722  ,  0.027375  ,  0.23724   , -0.39197001, -0.66065001,
        0.23587   ,  0.032384  , -0.64042997,  0.55004001,  0.29596999,
        0.14989001,  0.46079001, -0.26561001, -0.16069999, -0.36328   ,
        1.07819998,  0.31375   ,  0.1149    ,  0.20248   ,  0.032748  ,
        0.41082001, -0.082536  ,  0.36605999,  0.18771   ,  0.75414997,
        0.079648  ,  0.24180999, -0.60319   , -0.37296   , -0.047767  ,
        0.45008001, -0.21134999,  0.022251  , -0.084325  ,  0.18644001,
       -0.14681999,  0.56571001, -0.30994999,  0.17422999, -0.41122001,
       -0.84772003, -0.71113998,  0.69894999, -0.13008   , -0.34195   ,
       -0.30500999, -0.12646   ,  0.29956999, -0.43487999,  0.31935   ,
        0.28169999, -0.20631   , -0.48877001,  0.34477001,  0.03907   ,
        1.61979997, -0.63520002, -0.0037675 , -0.41271001,  0.30704001,
       -0.50485998,  0.036385  , -0.046386  , -0.12004   ,  0.010029  ,
       -0.49116001,  0.041486  ,  0.002979  , -0.57694   , -0.42087999,
       -0.063218  ,  0.0034244 , -0.25093001, -0.39689001, -0.36984   ,
        0.32688999,  0.01385   ,  0.23634   , -0.055199  , -0.58453   ,
        0.13211   ,  0.50942999,  0.25198001, -0.0088309 , -0.21273001,
       -0.48423001,  0.52340001, -0.32832   , -0.013821  ,  0.15812001,
        0.46696001,  0.036822  , -0.090878  ,  0.18854   ,  0.20794   ,
       -0.42682001,  0.59705001,  0.53109002,  0.19185001, -0.16392   ,
        0.064956  , -0.36008999, -0.59881997, -0.28134   ,  0.1017    ,
        0.02601   ,  0.44297999, -0.31922001, -0.22431999,  0.78280002,
        0.041307  ,  0.1742    ,  0.27777001,  0.43792   , -0.84324002,
        0.27011999, -0.21547   ,  0.52407998, -0.19426   , -0.21878   ,
       -0.20713   ,  0.092994  , -0.15804   ,  0.28716001, -0.11911   ,
       -0.20688   , -0.36482   ,  0.68548   , -0.10394   , -0.49974   ,
       -0.47038001, -1.29530001, -0.46235999,  0.44466999,  0.13337   ,
        0.88761997, -0.26493999,  0.080676  , -0.20625   , -0.51231998,
        0.31112   ,  0.062035  ,  0.30302   , -0.33344001, -0.20924   ,
       -0.17348   , -0.43434   , -0.45743001, -0.077803  , -0.33248001,
       -0.078633  ,  0.82182002,  0.082088  , -0.68795002,  0.30265999], dtype=float32)

For French word vectors, we're using those from http://fauconnier.github.io/index.html


In [11]:
art_vecs, art_wv_word, art_wv_idx = load_glove('/mnt/cvl-store-0/home/psxca1/data/glove/6B.300d')
art_w2v = {w: art_vecs[art_wv_idx[w]] for w in art_wv_word}
n_art_vec, dim_art_vec = art_vecs.shape

We need to map each word index in our vocabs to their word vector. Not every word in our vocabs will be in our word vectors, since our tokenization approach won't be identical to the word vector creators - in these cases we simply create a random vector.


In [12]:
def create_emb(w2v, targ_vocab, dim_vec):
    vocab_size = len(targ_vocab)
    emb = np.zeros((vocab_size, dim_vec))
    found=0

    for i, word in enumerate(targ_vocab):
        try: emb[i] = w2v[word]; found+=1
        except KeyError: emb[i] = np.random.normal(scale=0.6, size=(dim_vec,))

    return emb, found

In [13]:
hdln_embs, found = create_emb(hdln_w2v, hdln_vocab, dim_hdln_vec); hdln_embs.shape, found


Out[13]:
((38879, 300), 36743)

In [14]:
art_embs, found = create_emb(art_w2v, art_vocab, dim_art_vec); art_embs.shape, found


Out[14]:
((63727, 300), 58397)

Prep data

Each sentence has to be of equal length. Keras has a convenient function pad_sequences to truncate and/or pad each sentence as required - even although we're not using keras for the neural net, we can still use any functions from it we need!


In [15]:
from keras.preprocessing.sequence import pad_sequences
hdln_len = 30
art_len = 50

hdln_padded = pad_sequences(hdln_ids, hdln_len, 'int64', "post", "post")
art_padded = pad_sequences(art_ids, art_len, 'int64', "post", "post")
hdln_padded.shape, art_padded.shape, hdln_embs.shape


Out[15]:
((99812, 30), (99812, 50), (38879, 300))

And of course we need to separate our training and test sets...


In [29]:
from sklearn import model_selection
art_train, art_test, hdln_train, hdln_test = model_selection.train_test_split(
    art_padded, hdln_padded, test_size=0.1)

[o.shape for o in (art_train, art_test, hdln_train, hdln_test)]


Out[29]:
[(89830, 50), (9982, 50), (89830, 30), (9982, 30)]

In [16]:
art_train = pickle.load( open('1art_train.pkl', 'rb') )
art_test = pickle.load( open('1art_test.pkl', 'rb') )
hdln_train = pickle.load( open('1hdln_train.pkl', 'rb') )
hdln_test = pickle.load( open('1hdln_test.pkl', 'rb') )

Here's an example of a French and English sentence, after encoding and padding.


In [17]:
art_train[0], hdln_train[0]


Out[17]:
(array([    3,   134,     6,    82,   183,    19,  1120,    23,    29,
         4725,    43,  5378,  1009,     8,  2012,    25,  3703,  3754,
          995,    21,    18,    36,    34,  2814,  2229, 11172,     9,
            3,  3015,   693,   242,   124,  3995,    54,  2468,   104,
            9,     3,   311,   271,    83,     4,     0,     0,     0,
            0,     0,     0,     0,     0]),
 array([  759, 11954,  1034,     8,    25,  3280,     4,  1949,   751,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0]))

Model

Basic encoder-decoder


In [18]:
def long_t(arr): return Variable(torch.LongTensor(arr)).cuda()

In [19]:
art_emb_t = torch.FloatTensor(art_embs).cuda()
hdln_emb_t = torch.FloatTensor(hdln_embs).cuda()

In [20]:
def create_emb(emb_mat, non_trainable=False):
    output_size, emb_size = emb_mat.size()
    emb = nn.Embedding(output_size, emb_size)
    emb.load_state_dict({'weight': emb_mat})
    if non_trainable:
        for param in emb.parameters(): 
            param.requires_grad = False
    return emb, emb_size, output_size

Turning a sequence into a representation can be done using an RNN (called the 'encoder'. This approach is useful because RNN's are able to keep track of state and memory, which is obviously important in forming a complete understanding of a sentence.

  • bidirectional=True passes the original sequence through an RNN, and the reversed sequence through a different RNN and concatenates the results. This allows us to look forward and backwards.
  • We do this because in language things that happen later often influence what came before (i.e. in Spanish, "el chico, la chica" means the boy, the girl; the word for "the" is determined by the gender of the subject, which comes after).

In [21]:
class EncoderRNN(nn.Module):
    def __init__(self, embs, hidden_size, n_layers=4, dropout=0.2):
        super(EncoderRNN, self).__init__()
        self.emb, emb_size, output_size = create_emb(embs)
        self.n_layers = n_layers
        self.dropout = dropout
        self.hidden_size = hidden_size
        self.gru = nn.GRU(emb_size, hidden_size, batch_first=True, num_layers=self.n_layers, bidirectional=True, dropout=self.dropout)
        
    def forward(self, inp, hidden):
        outputs, hidden = self.gru(self.emb(inp), hidden)
        return outputs, hidden

    def initHidden(self, batch_size):
        return Variable(torch.zeros(self.n_layers * 2, batch_size, self.hidden_size)) # * 2 for bidirectional

In [22]:
def encode(inp, encoder):
    batch_size, input_length = inp.size()
    hidden = encoder.initHidden(batch_size).cuda()
    enc_outputs, hidden = encoder(inp, hidden)
    return long_t([SOS]*batch_size), enc_outputs, hidden

Finally, we arrive at a vector representation of the sequence which captures everything we need to translate it. We feed this vector into more RNN's, which are trying to generate the labels. After this, we make a classification for what each word is in the output sequence.


In [36]:
class DecoderRNN(nn.Module):
    def __init__(self, embs, hidden_size, n_layers=2):
        super(DecoderRNN, self).__init__()
        self.emb, emb_size, output_size = create_emb(embs)
        self.gru = nn.GRU(emb_size, hidden_size, batch_first=True, num_layers=n_layers)
        self.out = nn.Linear(hidden_size, output_size)
        
    def forward(self, inp, hidden):
        emb = self.emb(inp).unsqueeze(1)
        res, hidden = self.gru(emb, hidden)
        res = F.log_softmax(self.out(res[:,0]))
        return res, hidden

This graph demonstrates the accuracy decay for a neural translation task. With an encoding/decoding technique, larger input sequences result in less accuracy.

This can be mitigated using an attentional model.

Adding broadcasting to Pytorch


In [23]:
def unit_prefix(x, n=1):
    for i in range(n): x = x.unsqueeze(0)
    return x

def align(x, y, start_dim=2):
    xd, yd = x.dim(), y.dim()
    if xd > yd: y = unit_prefix(y, xd - yd)
    elif yd > xd: x = unit_prefix(x, yd - xd)

    xs, ys = list(x.size()), list(y.size())
    nd = len(ys)
    for i in range(start_dim, nd):
        td = nd-i-1
        if   ys[td]==1: ys[td] = xs[td]
        elif xs[td]==1: xs[td] = ys[td]
    return x.expand(*xs), y.expand(*ys)

In [24]:
def aligned_op(x,y,f): return f(*align(x,y,0))

def add(x, y): return aligned_op(x, y, operator.add)
def sub(x, y): return aligned_op(x, y, operator.sub)
def mul(x, y): return aligned_op(x, y, operator.mul)
def div(x, y): return aligned_op(x, y, operator.truediv)

In [25]:
def dot(x, y):
    assert(1<y.dim()<5)
    x, y = align(x, y)
    
    if y.dim() == 2: return x.mm(y)
    elif y.dim() == 3: return x.bmm(y)
    else:
        xs,ys = x.size(), y.size()
        res = torch.zeros(*(xs[:-1] + (ys[-1],)))
        for i in range(xs[0]): res[i].baddbmm_(x[i], (y[i]))
        return res

Attentional model


In [26]:
def Arr(*sz): return torch.randn(sz)/math.sqrt(sz[0])
def Var(*sz): return nn.Parameter(Arr(*sz)).cuda()

In [27]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, embs, hidden_size, n_layers=2, p=0.2):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers=n_layers
        self.p=p
        self.emb_dropout = nn.Dropout(p)
        
        self.emb, emb_size, output_size = create_emb(embs)
        self.W1 = Var(hidden_size*2, hidden_size) # * 2 for bidirectional
        self.W2 = Var(hidden_size, hidden_size)
        self.W3 = Var(emb_size+hidden_size*2, hidden_size) # * 2 for bidirectional
        self.b2 = Var(1,hidden_size)
        self.b3 = Var(1,hidden_size)
        self.V = Var(1,1,hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, num_layers=n_layers, bidirectional=True, dropout=self.p)
        self.out = nn.Linear(hidden_size*2, output_size) # * 2 for bidirectional

    def forward(self, inp, hidden, enc_outputs):
        emb_inp = self.emb_dropout(self.emb(inp))
        w1e = dot(enc_outputs, self.W1)
        w2h = dot(hidden[-1], self.W2)
        w2h = (w2h+self.b2.expand_as(w2h)).unsqueeze(1)
        u = F.tanh(w1e + w2h.expand_as(w1e))
        a = (self.V.expand_as(u)*u).sum(2).squeeze(2)
        a = F.softmax(a).unsqueeze(2)        
        Xa = (a.expand_as(enc_outputs) * enc_outputs).sum(1)     
        res = dot(torch.cat([emb_inp, Xa.squeeze(1)], 1), self.W3)
        res = (res+self.b3.expand_as(res)).unsqueeze(0)
        res, hidden = self.gru(res, hidden)
        res = F.log_softmax(self.out(res.squeeze(0)))
        ret = res, hidden
        return ret

Train


In [28]:
def get_batch(x, y, batch_size=16):
    idxs = np.random.permutation(len(x))[:batch_size]
    return x[idxs], y[idxs]

Pytorch has limited functionality for training models automatically - you will generally have to write your own training loops. However, Pytorch makes it far easier to customize how this training is done, such as using teacher forcing.


In [29]:
def train(inp, targ, encoder, decoder, enc_opt, dec_opt, crit, teacher_forcing_ratio):
    decoder_input, encoder_outputs, hidden = encode(inp, encoder)
    target_length = targ.size()[1]
    
    enc_opt.zero_grad(); dec_opt.zero_grad()
    loss = 0

    if random.random() < teacher_forcing_ratio:
        for di in range(target_length):
            decoder_output, hidden = decoder(decoder_input, hidden, encoder_outputs)
            loss += crit(decoder_output, targ[:, di])
            decoder_input = targ[:, di]
            
    else: # feed output for next input
        for di in range(target_length):
            decoder_output, hidden = decoder(decoder_input, hidden, encoder_outputs)
            loss += crit(decoder_output, targ[:, di])
            topv, topi = decoder_output.data.topk(1);
            decoder_input = Variable(topi.squeeze()).cuda()

    loss.backward()
    enc_opt.step(); dec_opt.step()
    return loss.data[0] / target_length

In [30]:
def calc_minutes(since):
    now = time.time()
    s = now - since
    return s/60

def as_minutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def time_since(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%-7s (- %s)' % (as_minutes(s), as_minutes(rs))

In [31]:
def req_grad_params(o):
    return (p for p in o.parameters() if p.requires_grad)

In [32]:
def trainEpochs(encoder, decoder, n_epochs, start_time, times_list, avg_loss_list, epochs_list,\
                print_every=200, lr=0.01, plot_loss_every=20, teacher_forcing = 'graduated',):
    print('LEARNING RATE: %f' % (lr))
    print_loss = 0 # Reset every print_every
    plot_loss = 0
    
    enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
    dec_opt = optim.Adam(decoder.parameters(), lr=lr)
    crit = nn.NLLLoss().cuda()
    
    for epoch in range(n_epochs):
        art, hdln = get_batch(art_train, hdln_train, 128)
        inp = long_t(art)
        targ = long_t(hdln)
        
        try:
            isinstance(teacher_forcing, (str, float, int))
        except:
            raise TypeError
        
        if teacher_forcing == 'graduated':
            teacher_forcing_ratio = 1 - epoch/n_epochs
        elif teacher_forcing == 'full':
            teacher_forcing_ratio = 1
        elif teacher_forcing == 'none':
            teacher_forcing_ratio = 0
        elif teacher_forcing <= 1 and teacher_forcing >= 0:
            teacher_forcing_ratio = teacher_forcing
        else:
            raise ValueError
        
        loss = train(inp, targ, encoder, decoder, enc_opt, dec_opt, crit, teacher_forcing_ratio)
        print_loss += loss
        plot_loss += loss

        if epoch % print_every == 0 and epoch is not 0:
            print('%s\t%d\t%d%%\t%.4f' % (time_since(start_time, epoch / n_epochs), \
                                          epoch, epoch / n_epochs * 100, print_loss / print_every))
            print_loss = 0
        
        if epoch % plot_loss_every == 0 and epoch is not 0:
            times_list.append(calc_minutes(start_time))
            avg_loss_list.append(plot_loss / plot_loss_every)
            epochs_list.append(epoch)
            plot_loss = 0

Training Loop


In [33]:
def multi_train(encoder, decoder, times_list, avg_loss_list, epochs_list, teacher_forcing_type):
    start_time = time.time()
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.003, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.003, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.001, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.001, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.0003, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.0003, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.0001, teacher_forcing=teacher_forcing_type)
    trainEpochs(encoder, decoder, 5000, start_time, times_list, avg_loss_list, epochs_list, lr=0.00003, teacher_forcing=teacher_forcing_type)

Prepare Testing Functions


In [ ]:
def evaluate(inp):
    decoder_input, encoder_outputs, hidden = encode(inp, encoder)
    target_length = hdln_len

    decoded_words = []
    for di in range(target_length):
        decoder_output, hidden = decoder(decoder_input, hidden, encoder_outputs)
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0];
        if ni==PAD: break
        decoded_words.append(hdln_vocab[ni])
        decoder_input = long_t([ni])
    
    return decoded_words

In [ ]:
# create blank files/erase previous content
#with open('./Dissertation/data/system/newsdecoded.txt','w') as dec_file, open('./rouge_eval/real_sents.txt', 'w') as source_file:
def test(f_path):
    ref_dir = f_path+'/reference/'
    system_dir = f_path+'/system/'
    
    if not os.path.exists(ref_dir): os.makedirs(os.path.dirname(ref_dir), exist_ok=True)
    if not os.path.exists(system_dir): os.makedirs(os.path.dirname(system_dir), exist_ok=True)
    
    for idx in range(len(art_test)):
        real_sent = [hdln_id2w[t] for t in hdln_test[idx] if t != 0]
        if real_sent:
            with open(ref_dir + 'news%d_reference%d' % (idx, idx), 'w') as f:
                f.write(' '.join(real_sent))
        else:
            continue

        ids = long_t(art_test[idx]); ids = ids.unsqueeze(0)
        translation = evaluate(ids)
        with open(system_dir + 'news%d_system%d' % (idx, idx), 'w') as f:
            f.write(' '.join(translation))

Run


In [34]:
test_num = 1

list_path = './Dissertation/lists/test{}/'.format(test_num)
enc_dec_path = './Dissertation/encoders_and_decoders/test{}/'.format(test_num)
eval_path = './Dissertation/rouge_eval/test{}/'.format(test_num)
plot_path = './Dissertation/plots/test{}/'.format(test_num)

list_path, enc_dec_path, eval_path, plot_path


Out[34]:
('./Dissertation/lists/test1/',
 './Dissertation/encoders_and_decoders/test1/',
 './Dissertation/rouge_eval/test1/',
 './Dissertation/plots/test1/')

Graduated_Reset

Define and Train


In [83]:
n_layers = 1
hidden_size = 128
graduated_reset_epochs_list = []
graduated_reset_times_list = []
graduated_reset_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, graduated_reset_times_list, graduated_reset_avg_loss_list, graduated_reset_epochs_list, 'graduated')


LEARNING RATE: 0.003000
1m 6s   (- 26m 27s)	200	4%	2.6117
2m 12s  (- 25m 27s)	400	8%	2.1652
3m 20s  (- 24m 26s)	600	12%	1.9953
4m 26s  (- 23m 19s)	800	16%	1.8760
5m 33s  (- 22m 14s)	1000	20%	1.8104
6m 40s  (- 21m 7s)	1200	24%	1.7213
7m 46s  (- 19m 59s)	1400	28%	1.6623
8m 54s  (- 18m 55s)	1600	32%	1.6231
10m 1s  (- 17m 48s)	1800	36%	1.5799
11m 8s  (- 16m 42s)	2000	40%	1.5745
12m 15s (- 15m 36s)	2200	44%	1.5008
13m 22s (- 14m 29s)	2400	48%	1.5210
14m 30s (- 13m 23s)	2600	52%	1.4890
15m 37s (- 12m 16s)	2800	56%	1.4636
16m 46s (- 11m 10s)	3000	60%	1.4660
17m 55s (- 10m 4s)	3200	64%	1.4580
19m 3s  (- 8m 58s)	3400	68%	1.4248
20m 11s (- 7m 51s)	3600	72%	1.4185
21m 19s (- 6m 44s)	3800	76%	1.4144
22m 29s (- 5m 37s)	4000	80%	1.3927
23m 38s (- 4m 30s)	4200	84%	1.3849
24m 47s (- 3m 22s)	4400	88%	1.3483
25m 56s (- 2m 15s)	4600	92%	1.3562
27m 6s  (- 1m 7s)	4800	96%	1.3279
LEARNING RATE: 0.003000
29m 20s (- 704m 19s)	200	4%	1.0629
30m 27s (- 350m 16s)	400	8%	0.9865
31m 33s (- 231m 28s)	600	12%	1.0328
32m 41s (- 171m 37s)	800	16%	1.0032
33m 48s (- 135m 14s)	1000	20%	0.9896
34m 55s (- 110m 36s)	1200	24%	1.0174
36m 2s  (- 92m 40s)	1400	28%	0.9763
37m 9s  (- 78m 57s)	1600	32%	0.9864
38m 16s (- 68m 3s)	1800	36%	1.0015
39m 24s (- 59m 7s)	2000	40%	1.0390
40m 32s (- 51m 36s)	2200	44%	1.0350
41m 40s (- 45m 8s)	2400	48%	1.0118
42m 48s (- 39m 30s)	2600	52%	1.0404
43m 56s (- 34m 31s)	2800	56%	1.0237
45m 5s  (- 30m 3s)	3000	60%	1.0321
46m 13s (- 26m 0s)	3200	64%	1.0687
47m 22s (- 22m 17s)	3400	68%	1.0559
48m 31s (- 18m 52s)	3600	72%	1.0668
49m 40s (- 15m 41s)	3800	76%	1.0496
50m 49s (- 12m 42s)	4000	80%	1.0595
51m 57s (- 9m 53s)	4200	84%	1.0475
53m 6s  (- 7m 14s)	4400	88%	1.0515
54m 15s (- 4m 43s)	4600	92%	1.0488
55m 24s (- 2m 18s)	4800	96%	1.0511
LEARNING RATE: 0.001000
57m 39s (- 1383m 40s)	200	4%	0.7537
58m 45s (- 675m 45s)	400	8%	0.6751
59m 51s (- 438m 58s)	600	12%	0.6655
60m 57s (- 320m 2s)	800	16%	0.6354
62m 3s  (- 248m 15s)	1000	20%	0.6334
63m 10s (- 200m 2s)	1200	24%	0.6289
64m 16s (- 165m 17s)	1400	28%	0.6423
65m 23s (- 138m 58s)	1600	32%	0.6568
66m 30s (- 118m 14s)	1800	36%	0.6518
67m 37s (- 101m 26s)	2000	40%	0.6510
68m 45s (- 87m 30s)	2200	44%	0.6353
69m 53s (- 75m 43s)	2400	48%	0.6496
71m 1s  (- 65m 33s)	2600	52%	0.6590
72m 9s  (- 56m 41s)	2800	56%	0.6494
73m 16s (- 48m 51s)	3000	60%	0.6614
74m 24s (- 41m 51s)	3200	64%	0.6560
75m 33s (- 35m 33s)	3400	68%	0.6972
76m 41s (- 29m 49s)	3600	72%	0.6815
77m 49s (- 24m 34s)	3800	76%	0.6766
78m 58s (- 19m 44s)	4000	80%	0.7018
80m 7s  (- 15m 15s)	4200	84%	0.6922
81m 16s (- 11m 5s)	4400	88%	0.7052
82m 26s (- 7m 10s)	4600	92%	0.7074
83m 36s (- 3m 29s)	4800	96%	0.7036
LEARNING RATE: 0.001000
85m 52s (- 2061m 8s)	200	4%	0.4574
86m 59s (- 1000m 21s)	400	8%	0.4301
88m 5s  (- 646m 3s)	600	12%	0.4552
89m 12s (- 468m 19s)	800	16%	0.4546
90m 19s (- 361m 16s)	1000	20%	0.4218
91m 26s (- 289m 34s)	1200	24%	0.4628
92m 33s (- 238m 1s)	1400	28%	0.4692
93m 41s (- 199m 5s)	1600	32%	0.5063
94m 48s (- 168m 32s)	1800	36%	0.4962
95m 56s (- 143m 55s)	2000	40%	0.5109
97m 4s  (- 123m 32s)	2200	44%	0.4950
98m 12s (- 106m 23s)	2400	48%	0.5173
99m 20s (- 91m 41s)	2600	52%	0.5186
100m 27s (- 78m 56s)	2800	56%	0.5256
101m 36s (- 67m 44s)	3000	60%	0.5566
102m 44s (- 57m 47s)	3200	64%	0.5623
103m 53s (- 48m 53s)	3400	68%	0.5661
105m 1s (- 40m 50s)	3600	72%	0.5732
106m 10s (- 33m 31s)	3800	76%	0.5881
107m 18s (- 26m 49s)	4000	80%	0.5939
108m 26s (- 20m 39s)	4200	84%	0.5870
109m 35s (- 14m 56s)	4400	88%	0.5847
110m 43s (- 9m 37s)	4600	92%	0.6017
111m 53s (- 4m 39s)	4800	96%	0.6170
LEARNING RATE: 0.000300
114m 9s (- 2739m 51s)	200	4%	0.3745
115m 15s (- 1325m 31s)	400	8%	0.3490
116m 22s (- 853m 27s)	600	12%	0.3577
117m 29s (- 616m 47s)	800	16%	0.3520
118m 35s (- 474m 21s)	1000	20%	0.3676
119m 42s (- 379m 4s)	1200	24%	0.3602
120m 49s (- 310m 41s)	1400	28%	0.3649
121m 57s (- 259m 9s)	1600	32%	0.3773
123m 5s (- 218m 49s)	1800	36%	0.3901
124m 12s (- 186m 19s)	2000	40%	0.3867
125m 20s (- 159m 31s)	2200	44%	0.3862
126m 28s (- 137m 0s)	2400	48%	0.3991
127m 36s (- 117m 47s)	2600	52%	0.4020
128m 43s (- 101m 8s)	2800	56%	0.4151
129m 52s (- 86m 35s)	3000	60%	0.4086
131m 0s (- 73m 41s)	3200	64%	0.4072
132m 9s (- 62m 11s)	3400	68%	0.4272
133m 17s (- 51m 50s)	3600	72%	0.4186
134m 26s (- 42m 27s)	3800	76%	0.4364
135m 34s (- 33m 53s)	4000	80%	0.4440
136m 43s (- 26m 2s)	4200	84%	0.4486
137m 53s (- 18m 48s)	4400	88%	0.4552
139m 2s (- 12m 5s)	4600	92%	0.4528
140m 11s (- 5m 50s)	4800	96%	0.4566
LEARNING RATE: 0.000300
142m 27s (- 3418m 52s)	200	4%	0.2805
143m 33s (- 1651m 0s)	400	8%	0.2795
144m 41s (- 1061m 4s)	600	12%	0.2881
145m 48s (- 765m 28s)	800	16%	0.2956
146m 55s (- 587m 42s)	1000	20%	0.2946
148m 2s (- 468m 46s)	1200	24%	0.2941
149m 9s (- 383m 33s)	1400	28%	0.3052
150m 17s (- 319m 21s)	1600	32%	0.3300
151m 24s (- 269m 10s)	1800	36%	0.3251
152m 32s (- 228m 48s)	2000	40%	0.3387
153m 41s (- 195m 36s)	2200	44%	0.3411
154m 49s (- 167m 43s)	2400	48%	0.3529
155m 57s (- 143m 57s)	2600	52%	0.3719
157m 5s (- 123m 25s)	2800	56%	0.3747
158m 14s (- 105m 29s)	3000	60%	0.3618
159m 22s (- 89m 38s)	3200	64%	0.3643
160m 31s (- 75m 32s)	3400	68%	0.3794
161m 40s (- 62m 52s)	3600	72%	0.3850
162m 48s (- 51m 24s)	3800	76%	0.3895
163m 57s (- 40m 59s)	4000	80%	0.3989
165m 6s (- 31m 26s)	4200	84%	0.4059
166m 15s (- 22m 40s)	4400	88%	0.3996
167m 24s (- 14m 33s)	4600	92%	0.4097
168m 33s (- 7m 1s)	4800	96%	0.4145
LEARNING RATE: 0.000100
170m 48s (- 4099m 35s)	200	4%	0.2531
171m 56s (- 1977m 16s)	400	8%	0.2516
173m 2s (- 1268m 56s)	600	12%	0.2535
174m 8s (- 914m 13s)	800	16%	0.2568
175m 15s (- 701m 2s)	1000	20%	0.2657
176m 23s (- 558m 33s)	1200	24%	0.2715
177m 30s (- 456m 26s)	1400	28%	0.2826
178m 36s (- 379m 33s)	1600	32%	0.2843
179m 44s (- 319m 33s)	1800	36%	0.2893
180m 51s (- 271m 17s)	2000	40%	0.3040
181m 59s (- 231m 37s)	2200	44%	0.3067
183m 7s (- 198m 22s)	2400	48%	0.3158
184m 42s (- 170m 29s)	2600	52%	0.3143
185m 52s (- 146m 2s)	2800	56%	0.3220
187m 1s (- 124m 40s)	3000	60%	0.3248
188m 9s (- 105m 50s)	3200	64%	0.3376
189m 17s (- 89m 4s)	3400	68%	0.3413
190m 26s (- 74m 3s)	3600	72%	0.3499
191m 34s (- 60m 29s)	3800	76%	0.3536
192m 44s (- 48m 11s)	4000	80%	0.3597
193m 53s (- 36m 55s)	4200	84%	0.3522
195m 2s (- 26m 35s)	4400	88%	0.3668
196m 11s (- 17m 3s)	4600	92%	0.3666
197m 21s (- 8m 13s)	4800	96%	0.3777
LEARNING RATE: 0.000030
199m 37s (- 4791m 4s)	200	4%	0.2409
200m 44s (- 2308m 27s)	400	8%	0.2386
201m 50s (- 1480m 12s)	600	12%	0.2365
202m 56s (- 1065m 25s)	800	16%	0.2375
204m 3s (- 816m 13s)	1000	20%	0.2521
205m 10s (- 649m 43s)	1200	24%	0.2591
206m 18s (- 530m 30s)	1400	28%	0.2554
207m 25s (- 440m 46s)	1600	32%	0.2701
208m 32s (- 370m 45s)	1800	36%	0.2679
209m 40s (- 314m 30s)	2000	40%	0.2826
210m 47s (- 268m 17s)	2200	44%	0.2832
211m 55s (- 229m 35s)	2400	48%	0.3107
213m 3s (- 196m 40s)	2600	52%	0.3027
214m 10s (- 168m 17s)	2800	56%	0.3028
215m 19s (- 143m 32s)	3000	60%	0.3089
216m 27s (- 121m 45s)	3200	64%	0.3211
217m 35s (- 102m 23s)	3400	68%	0.3281
218m 44s (- 85m 3s)	3600	72%	0.3302
219m 52s (- 69m 26s)	3800	76%	0.3368
221m 1s (- 55m 15s)	4000	80%	0.3475
222m 10s (- 42m 19s)	4200	84%	0.3503
223m 19s (- 30m 27s)	4400	88%	0.3538
224m 29s (- 19m 31s)	4600	92%	0.3592
225m 38s (- 9m 24s)	4800	96%	0.3627

In [84]:
with open(list_path+'graduated_reset_epochs_list.pkl', 'wb') as f:
    pickle.dump(graduated_reset_epochs_list, f)
with open(list_path+'graduated_reset_times_list.pkl', 'wb') as f:
    pickle.dump(graduated_reset_times_list, f)
with open(list_path+'graduated_reset_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(graduated_reset_avg_loss_list, f)

In [85]:
torch.save(encoder, enc_dec_path+'graduated_reset_encoder.pth')
torch.save(decoder, enc_dec_path+'graduated_reset_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

Plot


In [86]:
plt.plot(graduated_reset_times_list, graduated_reset_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('Graduated (Reset) Teacher Forcing Loss over Time')
plt.savefig(plot_path+'grad_reset.png')
plt.show()


Test


In [87]:
test(eval_path+'graduated_reset')

Graduated Over Entire Time


In [94]:
def fullgraduated_trainEpochs(encoder, decoder, total_epochs, start_time, times_list, avg_loss_list, epochs_list,\
                              print_every=200, lr=0.003, plot_loss_every=20):
    print("LEARNING RATE: %f" % (lr))
    print_loss = 0 # Reset every print_every
    plot_loss = 0
    
    enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
    dec_opt = optim.Adam(decoder.parameters(), lr=lr)
    crit = nn.NLLLoss().cuda()
    
    for epoch in range(total_epochs):
        art, hdln = get_batch(art_train, hdln_train, 128)
        inp = long_t(art)
        targ = long_t(hdln)
        
        teacher_forcing_ratio = 1 - epoch/total_epochs
        
        loss = train(inp, targ, encoder, decoder, enc_opt, dec_opt, crit, teacher_forcing_ratio)
        print_loss += loss
        plot_loss += loss

        if epoch % print_every == 0 and epoch is not 0:
            print('%s\t%d\t%d%%\t%.4f' % (time_since(start_time, epoch / total_epochs), \
                                          epoch, epoch / total_epochs * 100, print_loss / print_every))
            print_loss = 0
        
        if epoch % plot_loss_every == 0 and epoch is not 0:
            times_list.append(calc_minutes(start_time))
            avg_loss_list.append(plot_loss / plot_loss_every)
            epochs_list.append(epoch)
            plot_loss = 0
        
        if epoch == 10000:
            lr = .001
            print("LEARNING RATE: %f" % (lr))
            enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
            dec_opt = optim.Adam(decoder.parameters(), lr=lr)
        elif epoch == 20000:
            lr = .0003
            print("LEARNING RATE: %f" % (lr))
            enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
            dec_opt = optim.Adam(decoder.parameters(), lr=lr)
        elif epoch == 30000:
            lr = .0001
            print("LEARNING RATE: %f" % (lr))
            enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
            dec_opt = optim.Adam(decoder.parameters(), lr=lr)
        elif epoch == 35000:
            lr = .00003
            print("LEARNING RATE: %f" % (lr))
            enc_opt = optim.Adam(req_grad_params(encoder), lr=lr)
            dec_opt = optim.Adam(decoder.parameters(), lr=lr)

In [95]:
n_layers = 1
hidden_size = 128
graduated_epochs_list = []
graduated_times_list = []
graduated_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
fullgraduated_trainEpochs(encoder, decoder, 40000, time.time(), graduated_times_list, graduated_avg_loss_list, \
                          graduated_epochs_list)


LEARNING RATE: 0.003000
1m 6s   (- 220m 27s)	200	0%	2.6109
2m 12s  (- 218m 38s)	400	1%	2.1162
3m 18s  (- 217m 22s)	600	1%	1.8895
4m 25s  (- 216m 33s)	800	2%	1.7474
5m 31s  (- 215m 28s)	1000	2%	1.6410
6m 39s  (- 215m 3s)	1200	3%	1.5354
7m 45s  (- 213m 53s)	1400	3%	1.4663
8m 52s  (- 213m 1s)	1600	4%	1.4189
9m 59s  (- 212m 2s)	1800	4%	1.3220
11m 6s  (- 211m 7s)	2000	5%	1.2754
12m 13s (- 209m 59s)	2200	5%	1.2047
13m 19s (- 208m 51s)	2400	6%	1.1807
14m 27s (- 207m 59s)	2600	6%	1.1763
15m 34s (- 206m 58s)	2800	7%	1.1196
16m 48s (- 207m 24s)	3000	7%	1.1174
17m 56s (- 206m 16s)	3200	8%	1.0825
19m 4s  (- 205m 18s)	3400	8%	1.0443
20m 11s (- 204m 7s)	3600	9%	0.9974
21m 18s (- 202m 56s)	3800	9%	1.0187
22m 25s (- 201m 53s)	4000	10%	1.0179
23m 33s (- 200m 47s)	4200	10%	0.9913
24m 54s (- 201m 28s)	4400	11%	0.9559
26m 0s  (- 200m 11s)	4600	11%	0.9315
27m 6s  (- 198m 47s)	4800	12%	0.9111
28m 13s (- 197m 33s)	5000	12%	0.9247
29m 19s (- 196m 12s)	5200	13%	0.8759
30m 25s (- 194m 54s)	5400	13%	0.9674
31m 30s (- 193m 33s)	5600	14%	0.8628
32m 36s (- 192m 19s)	5800	14%	0.8103
33m 43s (- 191m 7s)	6000	15%	0.8429
34m 50s (- 189m 57s)	6200	15%	0.8346
35m 58s (- 188m 51s)	6400	16%	0.8492
37m 5s  (- 187m 42s)	6600	16%	0.8705
38m 12s (- 186m 32s)	6800	17%	0.8245
39m 19s (- 185m 23s)	7000	17%	0.8143
40m 26s (- 184m 14s)	7200	18%	0.7595
41m 33s (- 183m 5s)	7400	18%	0.8476
42m 40s (- 181m 56s)	7600	19%	0.7931
43m 48s (- 180m 50s)	7800	19%	0.8013
44m 55s (- 179m 41s)	8000	20%	0.7382
46m 3s  (- 178m 35s)	8200	20%	0.7475
47m 10s (- 177m 27s)	8400	21%	0.7693
48m 17s (- 176m 18s)	8600	21%	0.7596
49m 24s (- 175m 9s)	8800	22%	0.7468
50m 31s (- 174m 1s)	9000	22%	0.7479
51m 39s (- 172m 55s)	9200	23%	0.7163
52m 46s (- 171m 48s)	9400	23%	0.7381
53m 53s (- 170m 39s)	9600	24%	0.7302
55m 0s  (- 169m 31s)	9800	24%	0.7505
56m 8s  (- 168m 24s)	10000	25%	0.7594
LEARNING RATE: 0.001000
57m 15s (- 167m 17s)	10200	25%	0.7170
58m 23s (- 166m 10s)	10400	26%	0.6554
59m 30s (- 165m 2s)	10600	26%	0.6557
60m 38s (- 163m 57s)	10800	27%	0.6500
61m 45s (- 162m 49s)	11000	27%	0.5696
62m 52s (- 161m 40s)	11200	28%	0.5784
64m 0s  (- 160m 34s)	11400	28%	0.6043
65m 7s  (- 159m 27s)	11600	28%	0.5792
66m 15s (- 158m 21s)	11800	29%	0.5340
67m 24s (- 157m 16s)	12000	30%	0.5758
68m 32s (- 156m 11s)	12200	30%	0.5787
69m 40s (- 155m 4s)	12400	31%	0.5391
70m 48s (- 153m 58s)	12600	31%	0.5401
71m 55s (- 152m 50s)	12800	32%	0.5337
73m 4s  (- 151m 45s)	13000	32%	0.5101
74m 12s (- 150m 39s)	13200	33%	0.5256
75m 19s (- 149m 31s)	13400	33%	0.5190
76m 26s (- 148m 23s)	13600	34%	0.5340
77m 34s (- 147m 16s)	13800	34%	0.5465
78m 41s (- 146m 8s)	14000	35%	0.5314
79m 50s (- 145m 3s)	14200	35%	0.5088
80m 58s (- 143m 57s)	14400	36%	0.5260
82m 6s  (- 142m 50s)	14600	36%	0.4823
83m 13s (- 141m 42s)	14800	37%	0.4766
84m 20s (- 140m 34s)	15000	37%	0.5050
85m 28s (- 139m 26s)	15200	38%	0.4836
86m 35s (- 138m 18s)	15400	38%	0.5063
87m 42s (- 137m 10s)	15600	39%	0.4993
88m 53s (- 136m 8s)	15800	39%	0.4824
90m 1s  (- 135m 1s)	16000	40%	0.4912
91m 8s  (- 133m 54s)	16200	40%	0.4936
92m 16s (- 132m 47s)	16400	41%	0.5031
93m 24s (- 131m 40s)	16600	41%	0.4761
94m 32s (- 130m 33s)	16800	42%	0.4977
95m 40s (- 129m 26s)	17000	42%	0.4655
96m 48s (- 128m 19s)	17200	43%	0.4752
97m 56s (- 127m 12s)	17400	43%	0.4791
99m 4s  (- 126m 5s)	17600	44%	0.4848
100m 12s (- 124m 58s)	17800	44%	0.4817
101m 20s (- 123m 51s)	18000	45%	0.4338
102m 27s (- 122m 43s)	18200	45%	0.4597
103m 35s (- 121m 36s)	18400	46%	0.4940
104m 43s (- 120m 29s)	18600	46%	0.4790
105m 51s (- 119m 22s)	18800	47%	0.4698
107m 0s (- 118m 15s)	19000	47%	0.4610
108m 8s (- 117m 9s)	19200	48%	0.4562
109m 16s (- 116m 2s)	19400	48%	0.4858
110m 24s (- 114m 54s)	19600	49%	0.4496
111m 32s (- 113m 47s)	19800	49%	0.4446
112m 40s (- 112m 40s)	20000	50%	0.4825
LEARNING RATE: 0.000300
113m 49s (- 111m 34s)	20200	50%	0.4673
114m 58s (- 110m 27s)	20400	51%	0.4604
116m 6s (- 109m 20s)	20600	51%	0.4372
117m 13s (- 108m 12s)	20800	52%	0.4062
118m 22s (- 107m 5s)	21000	52%	0.4110
119m 30s (- 105m 58s)	21200	53%	0.4171
120m 37s (- 104m 50s)	21400	53%	0.4260
121m 44s (- 103m 42s)	21600	54%	0.3985
122m 51s (- 102m 34s)	21800	54%	0.3711
123m 59s (- 101m 26s)	22000	55%	0.3838
125m 6s (- 100m 18s)	22200	55%	0.3905
126m 15s (- 99m 11s)	22400	56%	0.3832
127m 23s (- 98m 4s)	22600	56%	0.3770
128m 30s (- 96m 56s)	22800	56%	0.3937
129m 37s (- 95m 48s)	23000	57%	0.3806
130m 46s (- 94m 41s)	23200	57%	0.3952
131m 54s (- 93m 34s)	23400	58%	0.3772
133m 2s (- 92m 27s)	23600	59%	0.3793
134m 11s (- 91m 20s)	23800	59%	0.3799
135m 18s (- 90m 12s)	24000	60%	0.3612
136m 26s (- 89m 5s)	24200	60%	0.3832
137m 35s (- 87m 58s)	24400	61%	0.3742
138m 43s (- 86m 50s)	24600	61%	0.3634
139m 51s (- 85m 43s)	24800	62%	0.3882
140m 59s (- 84m 35s)	25000	62%	0.3524
142m 7s (- 83m 28s)	25200	63%	0.3580
143m 16s (- 82m 21s)	25400	63%	0.3623
144m 24s (- 81m 13s)	25600	64%	0.3675
145m 32s (- 80m 6s)	25800	64%	0.3619
146m 40s (- 78m 58s)	26000	65%	0.3689
147m 48s (- 77m 51s)	26200	65%	0.3780
148m 55s (- 76m 43s)	26400	66%	0.3679
150m 3s (- 75m 35s)	26600	66%	0.3919
151m 12s (- 74m 28s)	26800	67%	0.3602
152m 19s (- 73m 20s)	27000	67%	0.3684
153m 28s (- 72m 13s)	27200	68%	0.3723
154m 36s (- 71m 5s)	27400	68%	0.3644
155m 44s (- 69m 58s)	27600	69%	0.3556
156m 52s (- 68m 50s)	27800	69%	0.3618
158m 1s (- 67m 43s)	28000	70%	0.3709
159m 10s (- 66m 36s)	28200	70%	0.3723
160m 19s (- 65m 28s)	28400	71%	0.3669
161m 27s (- 64m 21s)	28600	71%	0.3631
162m 35s (- 63m 13s)	28800	72%	0.3749
163m 43s (- 62m 6s)	29000	72%	0.3692
164m 53s (- 60m 59s)	29200	73%	0.3520
166m 1s (- 59m 51s)	29400	73%	0.3807
167m 10s (- 58m 44s)	29600	74%	0.3738
168m 18s (- 57m 36s)	29800	74%	0.3724
169m 26s (- 56m 28s)	30000	75%	0.3630
LEARNING RATE: 0.000100
170m 34s (- 55m 21s)	30200	75%	0.3651
171m 44s (- 54m 14s)	30400	76%	0.3627
173m 0s (- 53m 8s)	30600	76%	0.3579
174m 9s (- 52m 1s)	30800	77%	0.3571
175m 17s (- 50m 53s)	31000	77%	0.3608
176m 25s (- 49m 45s)	31200	78%	0.3491
177m 34s (- 48m 38s)	31400	78%	0.3579
178m 43s (- 47m 30s)	31600	79%	0.3544
179m 52s (- 46m 22s)	31800	79%	0.3562
181m 0s (- 45m 15s)	32000	80%	0.3510
182m 9s (- 44m 7s)	32200	80%	0.3712
183m 17s (- 42m 59s)	32400	81%	0.3501
184m 26s (- 41m 52s)	32600	81%	0.3508
185m 34s (- 40m 44s)	32800	82%	0.3422
186m 43s (- 39m 36s)	33000	82%	0.3548
187m 52s (- 38m 28s)	33200	83%	0.3546
189m 1s (- 37m 21s)	33400	83%	0.3514
190m 9s (- 36m 13s)	33600	84%	0.3487
191m 18s (- 35m 5s)	33800	84%	0.3509
192m 27s (- 33m 57s)	34000	85%	0.3564
193m 35s (- 32m 49s)	34200	85%	0.3550
194m 44s (- 31m 42s)	34400	86%	0.3480
195m 53s (- 30m 34s)	34600	86%	0.3573
197m 2s (- 29m 26s)	34800	87%	0.3500
198m 10s (- 28m 18s)	35000	87%	0.3520
LEARNING RATE: 0.000030
199m 19s (- 27m 10s)	35200	88%	0.3526
200m 27s (- 26m 2s)	35400	88%	0.3479
201m 36s (- 24m 55s)	35600	89%	0.3517
202m 45s (- 23m 47s)	35800	89%	0.3639
203m 54s (- 22m 39s)	36000	90%	0.3591
205m 3s (- 21m 31s)	36200	90%	0.3541
206m 11s (- 20m 23s)	36400	91%	0.3512
207m 20s (- 19m 15s)	36600	91%	0.3528
208m 28s (- 18m 7s)	36800	92%	0.3640
209m 37s (- 16m 59s)	37000	92%	0.3603
210m 47s (- 15m 51s)	37200	93%	0.3469
211m 56s (- 14m 44s)	37400	93%	0.3658
213m 5s (- 13m 36s)	37600	94%	0.3641
214m 14s (- 12m 28s)	37800	94%	0.3579
215m 24s (- 11m 20s)	38000	95%	0.3634
216m 33s (- 10m 12s)	38200	95%	0.3596
217m 42s (- 9m 4s)	38400	96%	0.3629
218m 52s (- 7m 56s)	38600	96%	0.3603
220m 1s (- 6m 48s)	38800	97%	0.3668
221m 10s (- 5m 40s)	39000	97%	0.3568
222m 19s (- 4m 32s)	39200	98%	0.3652
223m 29s (- 3m 24s)	39400	98%	0.3624
224m 38s (- 2m 16s)	39600	99%	0.3692
225m 47s (- 1m 8s)	39800	99%	0.3635

In [96]:
torch.save(encoder, enc_dec_path+'graduated_encoder.pth')
torch.save(decoder, enc_dec_path+'graduated_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [97]:
with open(list_path+'graduated_epochs_list.pkl', 'wb') as f:
    pickle.dump(graduated_epochs_list, f)
with open(list_path+'graduated_times_list.pkl', 'wb') as f:
    pickle.dump(graduated_times_list, f)
with open(list_path+'graduated_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(graduated_avg_loss_list, f)

In [98]:
plt.plot(graduated_times_list, graduated_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('Graduated Teacher Forcing Loss over Time')
plt.savefig(plot_path+'grad.png')
plt.show()



In [99]:
test(eval_path+'graduated')

Absolute Teacher Forcing


In [101]:
n_layers = 1
hidden_size = 128
full_epochs_list = []
full_times_list = []
full_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, full_times_list, full_avg_loss_list, full_epochs_list, 'full')


LEARNING RATE: 0.003000
1m 8s   (- 27m 19s)	200	4%	2.6000
2m 14s  (- 25m 49s)	400	8%	2.0982
3m 21s  (- 24m 34s)	600	12%	1.8854
4m 27s  (- 23m 25s)	800	16%	1.7299
5m 34s  (- 22m 17s)	1000	20%	1.6063
6m 40s  (- 21m 8s)	1200	24%	1.5054
7m 46s  (- 19m 58s)	1400	28%	1.4277
8m 53s  (- 18m 52s)	1600	32%	1.3450
9m 59s  (- 17m 46s)	1800	36%	1.2805
11m 5s  (- 16m 38s)	2000	40%	1.2178
12m 11s (- 15m 31s)	2200	44%	1.1734
13m 18s (- 14m 25s)	2400	48%	1.1246
14m 24s (- 13m 17s)	2600	52%	1.0810
15m 31s (- 12m 11s)	2800	56%	1.0445
16m 37s (- 11m 5s)	3000	60%	1.0031
17m 43s (- 9m 58s)	3200	64%	0.9734
18m 49s (- 8m 51s)	3400	68%	0.9473
19m 55s (- 7m 44s)	3600	72%	0.9170
21m 1s  (- 6m 38s)	3800	76%	0.8829
22m 6s  (- 5m 31s)	4000	80%	0.8581
23m 12s (- 4m 25s)	4200	84%	0.8321
24m 16s (- 3m 18s)	4400	88%	0.8095
25m 21s (- 2m 12s)	4600	92%	0.7838
26m 27s (- 1m 6s)	4800	96%	0.7633
LEARNING RATE: 0.003000
28m 40s (- 688m 18s)	200	4%	0.7665
29m 46s (- 342m 26s)	400	8%	0.7638
30m 53s (- 226m 31s)	600	12%	0.7336
31m 59s (- 167m 57s)	800	16%	0.7099
33m 6s  (- 132m 27s)	1000	20%	0.6851
34m 12s (- 108m 20s)	1200	24%	0.6762
35m 19s (- 90m 49s)	1400	28%	0.6537
36m 25s (- 77m 25s)	1600	32%	0.6317
37m 32s (- 66m 44s)	1800	36%	0.6114
38m 39s (- 57m 59s)	2000	40%	0.6085
39m 44s (- 50m 35s)	2200	44%	0.5944
40m 51s (- 44m 16s)	2400	48%	0.5807
41m 58s (- 38m 44s)	2600	52%	0.5707
43m 5s  (- 33m 51s)	2800	56%	0.5587
44m 12s (- 29m 28s)	3000	60%	0.5499
45m 17s (- 25m 28s)	3200	64%	0.5365
46m 24s (- 21m 50s)	3400	68%	0.5284
47m 30s (- 18m 28s)	3600	72%	0.5222
48m 36s (- 15m 21s)	3800	76%	0.5077
49m 42s (- 12m 25s)	4000	80%	0.5000
50m 48s (- 9m 40s)	4200	84%	0.4915
51m 54s (- 7m 4s)	4400	88%	0.4799
52m 59s (- 4m 36s)	4600	92%	0.4779
54m 5s  (- 2m 15s)	4800	96%	0.4635
LEARNING RATE: 0.001000
56m 18s (- 1351m 19s)	200	4%	0.4307
57m 24s (- 660m 15s)	400	8%	0.3921
58m 31s (- 429m 10s)	600	12%	0.3713
59m 37s (- 313m 3s)	800	16%	0.3575
60m 44s (- 242m 57s)	1000	20%	0.3329
61m 50s (- 195m 51s)	1200	24%	0.3230
62m 56s (- 161m 51s)	1400	28%	0.3098
64m 2s  (- 136m 5s)	1600	32%	0.3056
65m 8s  (- 115m 48s)	1800	36%	0.2921
66m 16s (- 99m 24s)	2000	40%	0.2855
67m 21s (- 85m 43s)	2200	44%	0.2772
68m 28s (- 74m 10s)	2400	48%	0.2733
69m 34s (- 64m 13s)	2600	52%	0.2683
70m 41s (- 55m 32s)	2800	56%	0.2571
71m 47s (- 47m 51s)	3000	60%	0.2582
72m 54s (- 41m 0s)	3200	64%	0.2537
74m 0s  (- 34m 49s)	3400	68%	0.2451
75m 8s  (- 29m 13s)	3600	72%	0.2428
76m 14s (- 24m 4s)	3800	76%	0.2370
77m 20s (- 19m 20s)	4000	80%	0.2353
78m 27s (- 14m 56s)	4200	84%	0.2286
79m 34s (- 10m 51s)	4400	88%	0.2285
80m 40s (- 7m 0s)	4600	92%	0.2243
81m 46s (- 3m 24s)	4800	96%	0.2200
LEARNING RATE: 0.001000
83m 59s (- 2015m 50s)	200	4%	0.2192
85m 6s  (- 978m 47s)	400	8%	0.2152
86m 13s (- 632m 17s)	600	12%	0.2142
87m 20s (- 458m 30s)	800	16%	0.2104
88m 26s (- 353m 46s)	1000	20%	0.2077
89m 32s (- 283m 31s)	1200	24%	0.2068
90m 38s (- 233m 5s)	1400	28%	0.2009
91m 45s (- 194m 59s)	1600	32%	0.1989
92m 52s (- 165m 7s)	1800	36%	0.1962
93m 59s (- 140m 59s)	2000	40%	0.1932
95m 5s  (- 121m 1s)	2200	44%	0.1886
96m 12s (- 104m 13s)	2400	48%	0.1877
97m 18s (- 89m 49s)	2600	52%	0.1877
98m 25s (- 77m 20s)	2800	56%	0.1851
99m 32s (- 66m 21s)	3000	60%	0.1784
100m 38s (- 56m 36s)	3200	64%	0.1791
101m 43s (- 47m 52s)	3400	68%	0.1766
102m 50s (- 39m 59s)	3600	72%	0.1748
103m 56s (- 32m 49s)	3800	76%	0.1745
105m 3s (- 26m 15s)	4000	80%	0.1716
106m 9s (- 20m 13s)	4200	84%	0.1693
107m 15s (- 14m 37s)	4400	88%	0.1686
108m 20s (- 9m 25s)	4600	92%	0.1636
109m 27s (- 4m 33s)	4800	96%	0.1632
LEARNING RATE: 0.000300
111m 40s (- 2680m 6s)	200	4%	0.1573
112m 47s (- 1297m 0s)	400	8%	0.1499
113m 53s (- 835m 13s)	600	12%	0.1411
114m 59s (- 603m 42s)	800	16%	0.1355
116m 5s (- 464m 22s)	1000	20%	0.1337
117m 12s (- 371m 8s)	1200	24%	0.1324
118m 18s (- 304m 13s)	1400	28%	0.1280
119m 24s (- 253m 44s)	1600	32%	0.1265
120m 30s (- 214m 13s)	1800	36%	0.1266
121m 36s (- 182m 24s)	2000	40%	0.1272
122m 43s (- 156m 11s)	2200	44%	0.1230
123m 49s (- 134m 8s)	2400	48%	0.1215
124m 54s (- 115m 18s)	2600	52%	0.1206
126m 0s (- 99m 0s)	2800	56%	0.1169
127m 7s (- 84m 44s)	3000	60%	0.1187
128m 13s (- 72m 7s)	3200	64%	0.1161
129m 20s (- 60m 52s)	3400	68%	0.1163
130m 27s (- 50m 43s)	3600	72%	0.1156
131m 33s (- 41m 32s)	3800	76%	0.1139
132m 38s (- 33m 9s)	4000	80%	0.1106
133m 44s (- 25m 28s)	4200	84%	0.1088
134m 51s (- 18m 23s)	4400	88%	0.1110
135m 56s (- 11m 49s)	4600	92%	0.1089
137m 3s (- 5m 42s)	4800	96%	0.1095
LEARNING RATE: 0.000300
139m 16s (- 3342m 28s)	200	4%	0.1093
140m 22s (- 1614m 21s)	400	8%	0.1079
141m 29s (- 1037m 33s)	600	12%	0.1085
142m 35s (- 748m 36s)	800	16%	0.1062
143m 42s (- 574m 48s)	1000	20%	0.1061
144m 47s (- 458m 31s)	1200	24%	0.1049
145m 54s (- 375m 10s)	1400	28%	0.1049
147m 0s (- 312m 22s)	1600	32%	0.1049
148m 6s (- 263m 19s)	1800	36%	0.1038
149m 13s (- 223m 49s)	2000	40%	0.1027
150m 19s (- 191m 19s)	2200	44%	0.1035
151m 26s (- 164m 3s)	2400	48%	0.1016
152m 32s (- 140m 48s)	2600	52%	0.1011
153m 39s (- 120m 43s)	2800	56%	0.0996
154m 45s (- 103m 10s)	3000	60%	0.0988
155m 50s (- 87m 39s)	3200	64%	0.0971
156m 56s (- 73m 51s)	3400	68%	0.0980
158m 3s (- 61m 28s)	3600	72%	0.0966
159m 9s (- 50m 15s)	3800	76%	0.0964
160m 16s (- 40m 4s)	4000	80%	0.0961
161m 22s (- 30m 44s)	4200	84%	0.0958
162m 29s (- 22m 9s)	4400	88%	0.0958
163m 35s (- 14m 13s)	4600	92%	0.0936
164m 40s (- 6m 51s)	4800	96%	0.0942
LEARNING RATE: 0.000100
166m 55s (- 4006m 5s)	200	4%	0.0937
168m 1s (- 1932m 22s)	400	8%	0.0908
169m 8s (- 1240m 21s)	600	12%	0.0905
170m 14s (- 893m 46s)	800	16%	0.0885
171m 21s (- 685m 26s)	1000	20%	0.0884
172m 28s (- 546m 9s)	1200	24%	0.0889
173m 34s (- 446m 19s)	1400	28%	0.0869
174m 41s (- 371m 12s)	1600	32%	0.0860
175m 48s (- 312m 32s)	1800	36%	0.0877
176m 54s (- 265m 21s)	2000	40%	0.0862
178m 1s (- 226m 34s)	2200	44%	0.0853
179m 6s (- 194m 2s)	2400	48%	0.0875
180m 13s (- 166m 21s)	2600	52%	0.0875
181m 19s (- 142m 28s)	2800	56%	0.0855
182m 26s (- 121m 37s)	3000	60%	0.0863
183m 36s (- 103m 16s)	3200	64%	0.0832
184m 51s (- 86m 59s)	3400	68%	0.0864
185m 58s (- 72m 19s)	3600	72%	0.0842
187m 3s (- 59m 4s)	3800	76%	0.0846
188m 10s (- 47m 2s)	4000	80%	0.0835
189m 16s (- 36m 3s)	4200	84%	0.0836
190m 23s (- 25m 57s)	4400	88%	0.0846
191m 29s (- 16m 39s)	4600	92%	0.0844
192m 36s (- 8m 1s)	4800	96%	0.0817
LEARNING RATE: 0.000030
194m 47s (- 4675m 9s)	200	4%	0.0845
195m 54s (- 2252m 58s)	400	8%	0.0823
197m 0s (- 1444m 43s)	600	12%	0.0823
198m 6s (- 1040m 1s)	800	16%	0.0834
199m 12s (- 796m 51s)	1000	20%	0.0812
200m 19s (- 634m 20s)	1200	24%	0.0818
201m 25s (- 517m 56s)	1400	28%	0.0816
202m 32s (- 430m 23s)	1600	32%	0.0825
203m 38s (- 362m 1s)	1800	36%	0.0788
204m 44s (- 307m 7s)	2000	40%	0.0832
205m 50s (- 261m 58s)	2200	44%	0.0808
206m 56s (- 224m 11s)	2400	48%	0.0822
208m 2s (- 192m 2s)	2600	52%	0.0796
209m 8s (- 164m 19s)	2800	56%	0.0828
210m 15s (- 140m 10s)	3000	60%	0.0810
211m 22s (- 118m 53s)	3200	64%	0.0794
212m 27s (- 99m 59s)	3400	68%	0.0801
213m 33s (- 83m 3s)	3600	72%	0.0809
214m 40s (- 67m 47s)	3800	76%	0.0804
215m 47s (- 53m 56s)	4000	80%	0.0803
216m 53s (- 41m 18s)	4200	84%	0.0780
218m 0s (- 29m 43s)	4400	88%	0.0782
219m 6s (- 19m 3s)	4600	92%	0.0785
220m 11s (- 9m 10s)	4800	96%	0.0800

In [102]:
torch.save(encoder, enc_dec_path+'full_tf_encoder.pth')
torch.save(decoder, enc_dec_path+'full_tf_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [103]:
with open(list_path+'full_epochs_list.pkl', 'wb') as f:
    pickle.dump(full_epochs_list, f)
with open(list_path+'full_times_list.pkl', 'wb') as f:
    pickle.dump(full_times_list, f)
with open(list_path+'full_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(full_avg_loss_list, f)

In [104]:
plt.plot(full_times_list, full_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('Full Teacher Forcing Loss over Time')
plt.savefig(plot_path+'full_tf.png')
plt.show()



In [105]:
test(eval_path+'full_tf')

No Teacher Forcing


In [35]:
n_layers = 1
hidden_size = 128
none_epochs_list = []
none_times_list = []
none_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, none_times_list, none_avg_loss_list, none_epochs_list, 'none')


LEARNING RATE: 0.003000
1m 9s   (- 27m 53s)	200	4%	2.7357
2m 16s  (- 26m 13s)	400	8%	2.3854
3m 23s  (- 24m 53s)	600	12%	2.2347
4m 30s  (- 23m 39s)	800	16%	2.1376
5m 38s  (- 22m 33s)	1000	20%	2.0458
6m 46s  (- 21m 27s)	1200	24%	1.9624
7m 52s  (- 20m 16s)	1400	28%	1.8985
8m 58s  (- 19m 3s)	1600	32%	1.8227
10m 2s  (- 17m 51s)	1800	36%	1.7608
11m 9s  (- 16m 43s)	2000	40%	1.7077
12m 17s (- 15m 38s)	2200	44%	1.6629
13m 26s (- 14m 33s)	2400	48%	1.6294
14m 35s (- 13m 28s)	2600	52%	1.5873
15m 44s (- 12m 22s)	2800	56%	1.5424
16m 54s (- 11m 16s)	3000	60%	1.5174
18m 2s  (- 10m 9s)	3200	64%	1.4929
19m 12s (- 9m 2s)	3400	68%	1.4585
20m 22s (- 7m 55s)	3600	72%	1.4384
21m 32s (- 6m 48s)	3800	76%	1.3984
22m 42s (- 5m 40s)	4000	80%	1.3829
23m 53s (- 4m 33s)	4200	84%	1.3588
25m 4s  (- 3m 25s)	4400	88%	1.3303
26m 14s (- 2m 16s)	4600	92%	1.3160
27m 24s (- 1m 8s)	4800	96%	1.3017
LEARNING RATE: 0.003000
29m 44s (- 713m 56s)	200	4%	1.3075
30m 55s (- 355m 34s)	400	8%	1.2749
32m 5s  (- 235m 17s)	600	12%	1.2536
33m 15s (- 174m 35s)	800	16%	1.2291
34m 25s (- 137m 40s)	1000	20%	1.2076
35m 34s (- 112m 40s)	1200	24%	1.1914
36m 45s (- 94m 30s)	1400	28%	1.1767
37m 55s (- 80m 35s)	1600	32%	1.1555
39m 5s  (- 69m 30s)	1800	36%	1.1528
40m 15s (- 60m 23s)	2000	40%	1.1391
41m 26s (- 52m 44s)	2200	44%	1.1150
42m 36s (- 46m 9s)	2400	48%	1.1085
43m 47s (- 40m 25s)	2600	52%	1.0975
44m 56s (- 35m 18s)	2800	56%	1.0907
46m 6s  (- 30m 44s)	3000	60%	1.0760
47m 17s (- 26m 36s)	3200	64%	1.0697
48m 28s (- 22m 48s)	3400	68%	1.0564
49m 37s (- 19m 18s)	3600	72%	1.0404
50m 48s (- 16m 2s)	3800	76%	1.0349
51m 58s (- 12m 59s)	4000	80%	1.0169
53m 8s  (- 10m 7s)	4200	84%	1.0153
54m 18s (- 7m 24s)	4400	88%	1.0049
55m 28s (- 4m 49s)	4600	92%	1.0053
56m 38s (- 2m 21s)	4800	96%	0.9820
LEARNING RATE: 0.001000
58m 58s (- 1415m 33s)	200	4%	0.9463
60m 8s  (- 691m 38s)	400	8%	0.8940
61m 18s (- 449m 34s)	600	12%	0.8641
62m 29s (- 328m 2s)	800	16%	0.8416
63m 39s (- 254m 38s)	1000	20%	0.8188
64m 50s (- 205m 18s)	1200	24%	0.8046
66m 0s  (- 169m 43s)	1400	28%	0.7832
67m 9s  (- 142m 43s)	1600	32%	0.7720
68m 20s (- 121m 29s)	1800	36%	0.7591
69m 30s (- 104m 15s)	2000	40%	0.7430
70m 41s (- 89m 57s)	2200	44%	0.7347
71m 51s (- 77m 51s)	2400	48%	0.7221
73m 1s  (- 67m 24s)	2600	52%	0.7208
74m 12s (- 58m 18s)	2800	56%	0.7103
75m 21s (- 50m 14s)	3000	60%	0.7032
76m 31s (- 43m 2s)	3200	64%	0.6922
77m 42s (- 36m 33s)	3400	68%	0.6847
78m 52s (- 30m 40s)	3600	72%	0.6769
80m 2s  (- 25m 16s)	3800	76%	0.6755
81m 12s (- 20m 18s)	4000	80%	0.6711
82m 22s (- 15m 41s)	4200	84%	0.6647
83m 32s (- 11m 23s)	4400	88%	0.6677
84m 42s (- 7m 21s)	4600	92%	0.6518
85m 52s (- 3m 34s)	4800	96%	0.6479
LEARNING RATE: 0.001000
88m 12s (- 2116m 55s)	200	4%	0.6441
89m 22s (- 1027m 52s)	400	8%	0.6505
90m 32s (- 664m 1s)	600	12%	0.6488
91m 43s (- 481m 32s)	800	16%	0.6437
92m 53s (- 371m 32s)	1000	20%	0.6376
94m 3s  (- 297m 50s)	1200	24%	0.6344
95m 13s (- 244m 51s)	1400	28%	0.6271
96m 23s (- 204m 49s)	1600	32%	0.6143
97m 33s (- 173m 25s)	1800	36%	0.6092
98m 43s (- 148m 5s)	2000	40%	0.6095
99m 53s (- 127m 7s)	2200	44%	0.6004
101m 3s (- 109m 28s)	2400	48%	0.5944
102m 13s (- 94m 21s)	2600	52%	0.5980
103m 23s (- 81m 14s)	2800	56%	0.5849
104m 33s (- 69m 42s)	3000	60%	0.5859
105m 43s (- 59m 28s)	3200	64%	0.5861
106m 54s (- 50m 18s)	3400	68%	0.5742
108m 3s (- 42m 1s)	3600	72%	0.5760
109m 14s (- 34m 29s)	3800	76%	0.5647
110m 24s (- 27m 36s)	4000	80%	0.5605
111m 34s (- 21m 15s)	4200	84%	0.5572
112m 43s (- 15m 22s)	4400	88%	0.5602
113m 53s (- 9m 54s)	4600	92%	0.5546
115m 3s (- 4m 47s)	4800	96%	0.5467
LEARNING RATE: 0.000300
117m 24s (- 2817m 53s)	200	4%	0.5351
118m 35s (- 1363m 44s)	400	8%	0.5155
119m 45s (- 878m 15s)	600	12%	0.5099
120m 55s (- 634m 52s)	800	16%	0.4985
122m 5s (- 488m 22s)	1000	20%	0.4881
123m 15s (- 390m 18s)	1200	24%	0.4786
124m 25s (- 319m 57s)	1400	28%	0.4791
125m 35s (- 266m 53s)	1600	32%	0.4671
126m 47s (- 225m 23s)	1800	36%	0.4647
127m 57s (- 191m 56s)	2000	40%	0.4556
129m 8s (- 164m 21s)	2200	44%	0.4570
130m 19s (- 141m 10s)	2400	48%	0.4491
131m 29s (- 121m 22s)	2600	52%	0.4468
132m 41s (- 104m 15s)	2800	56%	0.4507
133m 51s (- 89m 14s)	3000	60%	0.4451
135m 2s (- 75m 57s)	3200	64%	0.4360
136m 13s (- 64m 6s)	3400	68%	0.4352
137m 24s (- 53m 26s)	3600	72%	0.4351
138m 34s (- 43m 45s)	3800	76%	0.4284
139m 45s (- 34m 56s)	4000	80%	0.4262
140m 55s (- 26m 50s)	4200	84%	0.4235
142m 6s (- 19m 22s)	4400	88%	0.4294
143m 16s (- 12m 27s)	4600	92%	0.4233
144m 26s (- 6m 1s)	4800	96%	0.4161
LEARNING RATE: 0.000300
146m 47s (- 3522m 52s)	200	4%	0.4198
147m 57s (- 1701m 30s)	400	8%	0.4190
149m 7s (- 1093m 34s)	600	12%	0.4199
150m 17s (- 789m 4s)	800	16%	0.4119
151m 27s (- 605m 50s)	1000	20%	0.4109
152m 37s (- 483m 18s)	1200	24%	0.4132
153m 47s (- 395m 27s)	1400	28%	0.4067
154m 57s (- 329m 16s)	1600	32%	0.4047
156m 7s (- 277m 33s)	1800	36%	0.4104
157m 17s (- 235m 56s)	2000	40%	0.4027
158m 27s (- 201m 39s)	2200	44%	0.4035
159m 37s (- 172m 55s)	2400	48%	0.4015
160m 47s (- 148m 25s)	2600	52%	0.3953
161m 57s (- 127m 14s)	2800	56%	0.3959
163m 7s (- 108m 44s)	3000	60%	0.3916
164m 17s (- 92m 24s)	3200	64%	0.3883
165m 27s (- 77m 51s)	3400	68%	0.3923
166m 37s (- 64m 48s)	3600	72%	0.3893
167m 47s (- 52m 59s)	3800	76%	0.3920
168m 57s (- 42m 14s)	4000	80%	0.3878
170m 7s (- 32m 24s)	4200	84%	0.3875
171m 18s (- 23m 21s)	4400	88%	0.3788
172m 28s (- 14m 59s)	4600	92%	0.3785
173m 38s (- 7m 14s)	4800	96%	0.3759
LEARNING RATE: 0.000100
175m 58s (- 4223m 29s)	200	4%	0.3698
177m 9s (- 2037m 17s)	400	8%	0.3759
178m 19s (- 1307m 39s)	600	12%	0.3680
179m 29s (- 942m 19s)	800	16%	0.3678
180m 39s (- 722m 39s)	1000	20%	0.3652
181m 49s (- 575m 45s)	1200	24%	0.3661
182m 58s (- 470m 31s)	1400	28%	0.3557
184m 8s (- 391m 18s)	1600	32%	0.3587
185m 18s (- 329m 26s)	1800	36%	0.3503
186m 30s (- 279m 45s)	2000	40%	0.3552
187m 42s (- 238m 53s)	2200	44%	0.3542
188m 53s (- 204m 38s)	2400	48%	0.3521
190m 3s (- 175m 26s)	2600	52%	0.3497
191m 14s (- 150m 15s)	2800	56%	0.3463
192m 24s (- 128m 16s)	3000	60%	0.3471
193m 34s (- 108m 53s)	3200	64%	0.3534
194m 44s (- 91m 38s)	3400	68%	0.3452
195m 54s (- 76m 11s)	3600	72%	0.3498
197m 4s (- 62m 14s)	3800	76%	0.3478
198m 14s (- 49m 33s)	4000	80%	0.3452
199m 25s (- 37m 59s)	4200	84%	0.3477
200m 35s (- 27m 21s)	4400	88%	0.3458
201m 44s (- 17m 32s)	4600	92%	0.3391
202m 54s (- 8m 27s)	4800	96%	0.3400
LEARNING RATE: 0.000030
205m 14s (- 4925m 59s)	200	4%	0.3400
206m 25s (- 2373m 51s)	400	8%	0.3371
207m 35s (- 1522m 17s)	600	12%	0.3396
208m 45s (- 1095m 56s)	800	16%	0.3291
209m 55s (- 839m 40s)	1000	20%	0.3354
211m 4s (- 668m 25s)	1200	24%	0.3388
212m 15s (- 545m 48s)	1400	28%	0.3357
213m 25s (- 453m 32s)	1600	32%	0.3349
214m 35s (- 381m 30s)	1800	36%	0.3342
215m 46s (- 323m 39s)	2000	40%	0.3361
216m 56s (- 276m 6s)	2200	44%	0.3321
218m 6s (- 236m 17s)	2400	48%	0.3318
219m 16s (- 202m 24s)	2600	52%	0.3344
220m 27s (- 173m 12s)	2800	56%	0.3374
221m 37s (- 147m 44s)	3000	60%	0.3318
222m 47s (- 125m 18s)	3200	64%	0.3329
223m 57s (- 105m 23s)	3400	68%	0.3364
225m 6s (- 87m 32s)	3600	72%	0.3342
226m 16s (- 71m 27s)	3800	76%	0.3376
227m 26s (- 56m 51s)	4000	80%	0.3337
228m 36s (- 43m 32s)	4200	84%	0.3314
229m 44s (- 31m 19s)	4400	88%	0.3334
230m 50s (- 20m 4s)	4600	92%	0.3316
231m 57s (- 9m 39s)	4800	96%	0.3306

In [36]:
torch.save(encoder, enc_dec_path+'no_tf_encoder.pth')
torch.save(decoder, enc_dec_path+'no_tf_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [37]:
with open(list_path+'none_epochs_list.pkl', 'wb') as f:
    pickle.dump(none_epochs_list, f)
with open(list_path+'none_times_list.pkl', 'wb') as f:
    pickle.dump(none_times_list, f)
with open(list_path+'none_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(none_avg_loss_list, f)

In [ ]:
plt.plot(none_times_list, none_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('No Teacher Forcing Loss over Time')
plt.savefig(plot_path+'no_tf.png')
plt.show()

In [41]:
test(eval_path+'no_tf')

25% TF


In [55]:
n_layers = 1
hidden_size = 128
p25_epochs_list = []
p25_times_list = []
p25_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, p25_times_list, p25_avg_loss_list, p25_epochs_list, teacher_forcing_type=.25)


LEARNING RATE: 0.003000
1m 11s  (- 28m 32s)	200	4%	2.7340
2m 20s  (- 26m 51s)	400	8%	2.3710
3m 28s  (- 25m 32s)	600	12%	2.2264
4m 37s  (- 24m 17s)	800	16%	2.1342
5m 45s  (- 23m 1s)	1000	20%	2.0253
6m 54s  (- 21m 52s)	1200	24%	1.9593
8m 3s   (- 20m 43s)	1400	28%	1.8742
9m 11s  (- 19m 32s)	1600	32%	1.8099
10m 20s (- 18m 22s)	1800	36%	1.7694
11m 28s (- 17m 12s)	2000	40%	1.7053
12m 36s (- 16m 2s)	2200	44%	1.6628
13m 44s (- 14m 52s)	2400	48%	1.6285
14m 53s (- 13m 44s)	2600	52%	1.5800
16m 1s  (- 12m 35s)	2800	56%	1.5622
17m 10s (- 11m 26s)	3000	60%	1.5218
18m 19s (- 10m 18s)	3200	64%	1.5053
19m 28s (- 9m 9s)	3400	68%	1.4778
20m 37s (- 8m 1s)	3600	72%	1.4420
21m 45s (- 6m 52s)	3800	76%	1.4218
22m 54s (- 5m 43s)	4000	80%	1.3884
24m 2s  (- 4m 34s)	4200	84%	1.3906
25m 11s (- 3m 26s)	4400	88%	1.3452
26m 19s (- 2m 17s)	4600	92%	1.3310
27m 28s (- 1m 8s)	4800	96%	1.3109
LEARNING RATE: 0.003000
29m 44s (- 713m 39s)	200	4%	1.3280
30m 52s (- 355m 5s)	400	8%	1.3037
32m 1s  (- 234m 50s)	600	12%	1.2717
33m 9s  (- 174m 4s)	800	16%	1.2552
34m 17s (- 137m 9s)	1000	20%	1.2316
35m 26s (- 112m 13s)	1200	24%	1.2232
36m 34s (- 94m 3s)	1400	28%	1.2076
37m 43s (- 80m 9s)	1600	32%	1.1927
38m 52s (- 69m 6s)	1800	36%	1.1646
40m 0s  (- 60m 0s)	2000	40%	1.1698
41m 8s  (- 52m 22s)	2200	44%	1.1635
42m 17s (- 45m 48s)	2400	48%	1.1386
43m 24s (- 40m 4s)	2600	52%	1.1312
44m 31s (- 34m 58s)	2800	56%	1.1335
45m 38s (- 30m 25s)	3000	60%	1.1109
46m 44s (- 26m 17s)	3200	64%	1.1039
47m 51s (- 22m 31s)	3400	68%	1.0933
48m 58s (- 19m 2s)	3600	72%	1.0799
50m 4s  (- 15m 48s)	3800	76%	1.0835
51m 11s (- 12m 47s)	4000	80%	1.0613
52m 17s (- 9m 57s)	4200	84%	1.0493
53m 25s (- 7m 17s)	4400	88%	1.0409
54m 34s (- 4m 44s)	4600	92%	1.0489
55m 42s (- 2m 19s)	4800	96%	1.0321
LEARNING RATE: 0.001000
58m 0s  (- 1392m 10s)	200	4%	0.9794
59m 8s  (- 680m 7s)	400	8%	0.9288
60m 17s (- 442m 10s)	600	12%	0.9071
61m 26s (- 322m 35s)	800	16%	0.8815
62m 36s (- 250m 24s)	1000	20%	0.8563
63m 44s (- 201m 50s)	1200	24%	0.8325
64m 52s (- 166m 50s)	1400	28%	0.8176
66m 0s  (- 140m 16s)	1600	32%	0.8033
67m 9s  (- 119m 24s)	1800	36%	0.7854
68m 18s (- 102m 27s)	2000	40%	0.7761
69m 26s (- 88m 23s)	2200	44%	0.7770
70m 35s (- 76m 28s)	2400	48%	0.7678
71m 43s (- 66m 12s)	2600	52%	0.7568
72m 52s (- 57m 15s)	2800	56%	0.7442
74m 1s  (- 49m 21s)	3000	60%	0.7385
75m 9s  (- 42m 16s)	3200	64%	0.7160
76m 17s (- 35m 54s)	3400	68%	0.7271
77m 26s (- 30m 7s)	3600	72%	0.7163
78m 34s (- 24m 48s)	3800	76%	0.7017
79m 43s (- 19m 55s)	4000	80%	0.6973
80m 51s (- 15m 24s)	4200	84%	0.7047
81m 59s (- 11m 10s)	4400	88%	0.6909
83m 8s  (- 7m 13s)	4600	92%	0.6906
84m 16s (- 3m 30s)	4800	96%	0.6895
LEARNING RATE: 0.001000
86m 33s (- 2077m 13s)	200	4%	0.6829
87m 42s (- 1008m 33s)	400	8%	0.6846
88m 50s (- 651m 33s)	600	12%	0.6899
89m 58s (- 472m 23s)	800	16%	0.6705
91m 6s  (- 364m 27s)	1000	20%	0.6692
92m 15s (- 292m 8s)	1200	24%	0.6714
93m 24s (- 240m 11s)	1400	28%	0.6581
94m 32s (- 200m 54s)	1600	32%	0.6556
95m 41s (- 170m 7s)	1800	36%	0.6517
96m 50s (- 145m 15s)	2000	40%	0.6427
97m 59s (- 124m 42s)	2200	44%	0.6367
99m 7s  (- 107m 23s)	2400	48%	0.6296
100m 15s (- 92m 32s)	2600	52%	0.6344
101m 23s (- 79m 40s)	2800	56%	0.6322
102m 32s (- 68m 21s)	3000	60%	0.6220
103m 40s (- 58m 19s)	3200	64%	0.6214
104m 49s (- 49m 19s)	3400	68%	0.6220
105m 57s (- 41m 12s)	3600	72%	0.6131
107m 6s (- 33m 49s)	3800	76%	0.6133
108m 15s (- 27m 3s)	4000	80%	0.6054
109m 24s (- 20m 50s)	4200	84%	0.5954
110m 32s (- 15m 4s)	4400	88%	0.5934
111m 41s (- 9m 42s)	4600	92%	0.5867
112m 49s (- 4m 42s)	4800	96%	0.5860
LEARNING RATE: 0.000300
115m 7s (- 2763m 2s)	200	4%	0.5769
116m 16s (- 1337m 4s)	400	8%	0.5608
117m 25s (- 861m 3s)	600	12%	0.5447
118m 32s (- 622m 22s)	800	16%	0.5299
119m 40s (- 478m 41s)	1000	20%	0.5221
120m 48s (- 382m 33s)	1200	24%	0.5199
121m 57s (- 313m 35s)	1400	28%	0.5139
123m 5s (- 261m 35s)	1600	32%	0.4916
124m 14s (- 220m 53s)	1800	36%	0.5038
125m 23s (- 188m 5s)	2000	40%	0.4962
126m 32s (- 161m 2s)	2200	44%	0.5119
127m 40s (- 138m 18s)	2400	48%	0.4857
128m 48s (- 118m 53s)	2600	52%	0.4853
130m 0s (- 102m 8s)	2800	56%	0.4882
131m 8s (- 87m 25s)	3000	60%	0.4800
132m 16s (- 74m 24s)	3200	64%	0.4796
133m 25s (- 62m 47s)	3400	68%	0.4847
134m 34s (- 52m 20s)	3600	72%	0.4776
135m 42s (- 42m 51s)	3800	76%	0.4689
136m 51s (- 34m 12s)	4000	80%	0.4694
137m 59s (- 26m 16s)	4200	84%	0.4673
139m 7s (- 18m 58s)	4400	88%	0.4643
140m 15s (- 12m 11s)	4600	92%	0.4703
141m 23s (- 5m 53s)	4800	96%	0.4661
LEARNING RATE: 0.000300
143m 41s (- 3448m 24s)	200	4%	0.4638
144m 49s (- 1665m 31s)	400	8%	0.4619
145m 59s (- 1070m 33s)	600	12%	0.4682
147m 8s (- 772m 28s)	800	16%	0.4518
148m 16s (- 593m 5s)	1000	20%	0.4614
149m 25s (- 473m 10s)	1200	24%	0.4510
150m 34s (- 387m 11s)	1400	28%	0.4549
151m 43s (- 322m 25s)	1600	32%	0.4499
152m 51s (- 271m 45s)	1800	36%	0.4484
154m 0s (- 231m 0s)	2000	40%	0.4410
155m 8s (- 197m 27s)	2200	44%	0.4443
156m 17s (- 169m 18s)	2400	48%	0.4355
157m 25s (- 145m 19s)	2600	52%	0.4461
158m 34s (- 124m 35s)	2800	56%	0.4297
159m 42s (- 106m 28s)	3000	60%	0.4285
160m 51s (- 90m 28s)	3200	64%	0.4414
162m 0s (- 76m 14s)	3400	68%	0.4361
163m 8s (- 63m 26s)	3600	72%	0.4302
164m 17s (- 51m 53s)	3800	76%	0.4330
165m 27s (- 41m 21s)	4000	80%	0.4344
166m 36s (- 31m 44s)	4200	84%	0.4208
167m 43s (- 22m 52s)	4400	88%	0.4216
168m 52s (- 14m 41s)	4600	92%	0.4257
170m 0s (- 7m 5s)	4800	96%	0.4165
LEARNING RATE: 0.000100
172m 16s (- 4134m 45s)	200	4%	0.4178
173m 25s (- 1994m 17s)	400	8%	0.4166
174m 33s (- 1280m 5s)	600	12%	0.4132
175m 41s (- 922m 25s)	800	16%	0.4034
176m 51s (- 707m 24s)	1000	20%	0.4041
177m 59s (- 563m 37s)	1200	24%	0.3968
179m 8s (- 460m 37s)	1400	28%	0.4049
180m 16s (- 383m 5s)	1600	32%	0.4012
181m 25s (- 322m 32s)	1800	36%	0.3985
182m 33s (- 273m 49s)	2000	40%	0.3938
183m 41s (- 233m 47s)	2200	44%	0.3982
184m 50s (- 200m 14s)	2400	48%	0.3982
185m 59s (- 171m 41s)	2600	52%	0.3971
187m 7s (- 147m 1s)	2800	56%	0.3992
188m 16s (- 125m 31s)	3000	60%	0.3984
189m 25s (- 106m 32s)	3200	64%	0.3896
190m 32s (- 89m 40s)	3400	68%	0.3926
191m 40s (- 74m 32s)	3600	72%	0.3951
192m 48s (- 60m 53s)	3800	76%	0.3943
193m 57s (- 48m 29s)	4000	80%	0.3931
195m 5s (- 37m 9s)	4200	84%	0.3854
196m 14s (- 26m 45s)	4400	88%	0.3955
197m 23s (- 17m 9s)	4600	92%	0.3915
198m 31s (- 8m 16s)	4800	96%	0.3895
LEARNING RATE: 0.000030
200m 48s (- 4819m 20s)	200	4%	0.3909
201m 55s (- 2322m 13s)	400	8%	0.3841
203m 4s (- 1489m 11s)	600	12%	0.3819
204m 12s (- 1072m 6s)	800	16%	0.3857
205m 20s (- 821m 23s)	1000	20%	0.3876
206m 29s (- 653m 53s)	1200	24%	0.3793
207m 37s (- 533m 53s)	1400	28%	0.3923
208m 45s (- 443m 37s)	1600	32%	0.3826
209m 54s (- 373m 9s)	1800	36%	0.3830
211m 2s (- 316m 34s)	2000	40%	0.3852
212m 11s (- 270m 3s)	2200	44%	0.3846
213m 19s (- 231m 6s)	2400	48%	0.3842
214m 28s (- 197m 58s)	2600	52%	0.3798
215m 36s (- 169m 24s)	2800	56%	0.3668
216m 45s (- 144m 30s)	3000	60%	0.3871
217m 54s (- 122m 34s)	3200	64%	0.3804
219m 2s (- 103m 4s)	3400	68%	0.3747
220m 10s (- 85m 37s)	3600	72%	0.3755
221m 18s (- 69m 53s)	3800	76%	0.3831
222m 27s (- 55m 36s)	4000	80%	0.3762
223m 36s (- 42m 35s)	4200	84%	0.3864
224m 43s (- 30m 38s)	4400	88%	0.3735
225m 52s (- 19m 38s)	4600	92%	0.3737
227m 1s (- 9m 27s)	4800	96%	0.3800

In [56]:
torch.save(encoder, enc_dec_path+'p25_tf_encoder.pth')
torch.save(decoder, enc_dec_path+'/p25_tf_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [57]:
with open(list_path+'p25_epochs_list.pkl', 'wb') as f:
    pickle.dump(p25_epochs_list, f)
with open(list_path+'p25_times_list.pkl', 'wb') as f:
    pickle.dump(p25_times_list, f)
with open(list_path+'p25_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(p25_avg_loss_list, f)

In [58]:
plt.plot(p25_times_list, p25_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('25% Teacher Forcing Loss over Time')
plt.savefig(plot_path+'p25_tf.png')
plt.show()



In [71]:
test(eval_path+'p25_tf')

50% TF


In [73]:
n_layers = 1
hidden_size = 128
p50_epochs_list = []
p50_times_list = []
p50_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, p50_times_list, p50_avg_loss_list, p50_epochs_list, .5)


LEARNING RATE: 0.003000
1m 2s   (- 24m 55s)	200	4%	2.7314
2m 4s   (- 23m 53s)	400	8%	2.3372
3m 7s   (- 22m 58s)	600	12%	2.1844
4m 11s  (- 21m 59s)	800	16%	2.0559
5m 14s  (- 20m 59s)	1000	20%	1.9403
6m 19s  (- 20m 2s)	1200	24%	1.8512
7m 24s  (- 19m 2s)	1400	28%	1.7917
8m 28s  (- 18m 1s)	1600	32%	1.7232
9m 33s  (- 16m 58s)	1800	36%	1.6849
10m 37s (- 15m 56s)	2000	40%	1.6010
11m 41s (- 14m 52s)	2200	44%	1.5700
12m 45s (- 13m 49s)	2400	48%	1.5061
13m 49s (- 12m 45s)	2600	52%	1.4847
14m 53s (- 11m 41s)	2800	56%	1.4569
15m 57s (- 10m 38s)	3000	60%	1.4468
17m 2s  (- 9m 34s)	3200	64%	1.4083
18m 5s  (- 8m 30s)	3400	68%	1.3731
19m 9s  (- 7m 27s)	3600	72%	1.3202
20m 13s (- 6m 23s)	3800	76%	1.3348
21m 16s (- 5m 19s)	4000	80%	1.2739
22m 20s (- 4m 15s)	4200	84%	1.2503
23m 23s (- 3m 11s)	4400	88%	1.2457
24m 26s (- 2m 7s)	4600	92%	1.2263
25m 28s (- 1m 3s)	4800	96%	1.2211
LEARNING RATE: 0.003000
27m 34s (- 661m 44s)	200	4%	1.2285
28m 36s (- 329m 4s)	400	8%	1.2088
29m 39s (- 217m 31s)	600	12%	1.1694
30m 43s (- 161m 15s)	800	16%	1.1245
31m 44s (- 126m 58s)	1000	20%	1.1275
32m 47s (- 103m 51s)	1200	24%	1.1129
33m 50s (- 87m 0s)	1400	28%	1.1052
34m 52s (- 74m 5s)	1600	32%	1.0828
35m 53s (- 63m 48s)	1800	36%	1.0527
36m 55s (- 55m 23s)	2000	40%	1.0467
37m 58s (- 48m 20s)	2200	44%	1.0416
39m 1s  (- 42m 16s)	2400	48%	1.0431
40m 3s  (- 36m 58s)	2600	52%	1.0084
41m 6s  (- 32m 18s)	2800	56%	1.0269
42m 9s  (- 28m 6s)	3000	60%	0.9977
43m 11s (- 24m 17s)	3200	64%	0.9970
44m 15s (- 20m 49s)	3400	68%	0.9896
45m 17s (- 17m 36s)	3600	72%	0.9912
46m 20s (- 14m 37s)	3800	76%	0.9635
47m 22s (- 11m 50s)	4000	80%	0.9479
48m 24s (- 9m 13s)	4200	84%	0.9511
49m 27s (- 6m 44s)	4400	88%	0.9414
50m 29s (- 4m 23s)	4600	92%	0.9121
51m 31s (- 2m 8s)	4800	96%	0.9147
LEARNING RATE: 0.001000
53m 37s (- 1286m 49s)	200	4%	0.8755
54m 39s (- 628m 36s)	400	8%	0.8373
55m 41s (- 408m 26s)	600	12%	0.7966
56m 44s (- 297m 51s)	800	16%	0.7693
57m 48s (- 231m 14s)	1000	20%	0.7316
58m 54s (- 186m 31s)	1200	24%	0.7252
59m 58s (- 154m 14s)	1400	28%	0.7079
61m 4s  (- 129m 47s)	1600	32%	0.7134
62m 9s  (- 110m 30s)	1800	36%	0.6872
63m 15s (- 94m 52s)	2000	40%	0.6738
64m 18s (- 81m 51s)	2200	44%	0.6557
65m 20s (- 70m 47s)	2400	48%	0.6312
66m 22s (- 61m 16s)	2600	52%	0.6299
67m 25s (- 52m 58s)	2800	56%	0.6601
68m 29s (- 45m 39s)	3000	60%	0.6418
69m 32s (- 39m 7s)	3200	64%	0.6269
70m 37s (- 33m 14s)	3400	68%	0.6172
71m 42s (- 27m 53s)	3600	72%	0.6233
72m 47s (- 22m 59s)	3800	76%	0.6029
73m 52s (- 18m 28s)	4000	80%	0.5878
74m 58s (- 14m 16s)	4200	84%	0.6082
76m 3s  (- 10m 22s)	4400	88%	0.6119
77m 8s  (- 6m 42s)	4600	92%	0.5960
78m 13s (- 3m 15s)	4800	96%	0.5694
LEARNING RATE: 0.001000
80m 25s (- 1930m 1s)	200	4%	0.5889
81m 30s (- 937m 18s)	400	8%	0.5655
82m 35s (- 605m 41s)	600	12%	0.5893
83m 41s (- 439m 22s)	800	16%	0.5568
84m 46s (- 339m 6s)	1000	20%	0.5488
85m 52s (- 271m 55s)	1200	24%	0.5783
86m 57s (- 223m 36s)	1400	28%	0.5439
88m 3s  (- 187m 7s)	1600	32%	0.5458
89m 8s  (- 158m 29s)	1800	36%	0.5513
90m 14s (- 135m 22s)	2000	40%	0.5369
91m 20s (- 116m 15s)	2200	44%	0.5297
92m 26s (- 100m 9s)	2400	48%	0.5136
93m 32s (- 86m 20s)	2600	52%	0.5397
94m 38s (- 74m 21s)	2800	56%	0.5112
95m 44s (- 63m 49s)	3000	60%	0.5201
96m 50s (- 54m 28s)	3200	64%	0.5125
97m 57s (- 46m 5s)	3400	68%	0.5049
99m 4s  (- 38m 31s)	3600	72%	0.5043
100m 12s (- 31m 38s)	3800	76%	0.5003
101m 19s (- 25m 19s)	4000	80%	0.4931
102m 26s (- 19m 30s)	4200	84%	0.4864
103m 33s (- 14m 7s)	4400	88%	0.4911
104m 41s (- 9m 6s)	4600	92%	0.4759
105m 49s (- 4m 24s)	4800	96%	0.4952
LEARNING RATE: 0.000300
108m 6s (- 2594m 27s)	200	4%	0.4788
109m 13s (- 1256m 7s)	400	8%	0.4497
110m 21s (- 809m 16s)	600	12%	0.4312
111m 29s (- 585m 20s)	800	16%	0.4402
112m 37s (- 450m 30s)	1000	20%	0.4328
113m 45s (- 360m 13s)	1200	24%	0.4095
114m 53s (- 295m 26s)	1400	28%	0.4134
116m 1s (- 246m 32s)	1600	32%	0.3942
117m 8s (- 208m 15s)	1800	36%	0.3993
118m 16s (- 177m 24s)	2000	40%	0.4006
119m 24s (- 151m 57s)	2200	44%	0.4096
120m 32s (- 130m 34s)	2400	48%	0.3779
121m 38s (- 112m 17s)	2600	52%	0.3882
122m 46s (- 96m 27s)	2800	56%	0.3759
123m 53s (- 82m 35s)	3000	60%	0.3812
125m 0s (- 70m 19s)	3200	64%	0.3729
126m 8s (- 59m 21s)	3400	68%	0.3771
127m 16s (- 49m 29s)	3600	72%	0.3702
128m 24s (- 40m 33s)	3800	76%	0.3779
129m 33s (- 32m 23s)	4000	80%	0.3620
130m 41s (- 24m 53s)	4200	84%	0.3698
131m 49s (- 17m 58s)	4400	88%	0.3656
132m 57s (- 11m 33s)	4600	92%	0.3626
134m 4s (- 5m 35s)	4800	96%	0.3542
LEARNING RATE: 0.000300
136m 19s (- 3271m 45s)	200	4%	0.3689
137m 26s (- 1580m 35s)	400	8%	0.3487
138m 34s (- 1016m 15s)	600	12%	0.3611
139m 42s (- 733m 29s)	800	16%	0.3679
140m 50s (- 563m 21s)	1000	20%	0.3609
141m 57s (- 449m 33s)	1200	24%	0.3575
143m 6s (- 367m 58s)	1400	28%	0.3685
144m 14s (- 306m 29s)	1600	32%	0.3500
145m 21s (- 258m 25s)	1800	36%	0.3486
146m 28s (- 219m 43s)	2000	40%	0.3570
147m 36s (- 187m 51s)	2200	44%	0.3498
148m 43s (- 161m 6s)	2400	48%	0.3328
149m 50s (- 138m 18s)	2600	52%	0.3438
150m 58s (- 118m 37s)	2800	56%	0.3413
152m 5s (- 101m 23s)	3000	60%	0.3318
153m 13s (- 86m 11s)	3200	64%	0.3432
154m 21s (- 72m 38s)	3400	68%	0.3343
155m 29s (- 60m 28s)	3600	72%	0.3518
156m 38s (- 49m 27s)	3800	76%	0.3212
157m 45s (- 39m 26s)	4000	80%	0.3321
158m 53s (- 30m 15s)	4200	84%	0.3431
160m 1s (- 21m 49s)	4400	88%	0.3252
161m 10s (- 14m 0s)	4600	92%	0.3258
162m 18s (- 6m 45s)	4800	96%	0.3206
LEARNING RATE: 0.000100
164m 34s (- 3949m 43s)	200	4%	0.3296
165m 42s (- 1905m 36s)	400	8%	0.3098
166m 49s (- 1223m 25s)	600	12%	0.3317
167m 57s (- 881m 48s)	800	16%	0.3160
169m 5s (- 676m 21s)	1000	20%	0.3189
170m 12s (- 538m 59s)	1200	24%	0.3077
171m 20s (- 440m 34s)	1400	28%	0.3074
172m 27s (- 366m 27s)	1600	32%	0.3035
173m 34s (- 308m 34s)	1800	36%	0.3047
174m 41s (- 262m 2s)	2000	40%	0.3013
175m 49s (- 223m 46s)	2200	44%	0.3055
176m 57s (- 191m 42s)	2400	48%	0.3015
178m 4s (- 164m 23s)	2600	52%	0.2956
179m 12s (- 140m 48s)	2800	56%	0.3067
180m 20s (- 120m 13s)	3000	60%	0.3018
181m 28s (- 102m 4s)	3200	64%	0.2982
182m 36s (- 85m 56s)	3400	68%	0.2917
183m 44s (- 71m 27s)	3600	72%	0.3012
184m 52s (- 58m 22s)	3800	76%	0.2975
186m 0s (- 46m 30s)	4000	80%	0.2986
187m 8s (- 35m 38s)	4200	84%	0.2965
188m 16s (- 25m 40s)	4400	88%	0.2952
189m 24s (- 16m 28s)	4600	92%	0.3091
190m 32s (- 7m 56s)	4800	96%	0.3089
LEARNING RATE: 0.000030
192m 47s (- 4626m 49s)	200	4%	0.2971
193m 54s (- 2230m 0s)	400	8%	0.2866
195m 1s (- 1430m 9s)	600	12%	0.2767
196m 8s (- 1029m 46s)	800	16%	0.2893
197m 16s (- 789m 7s)	1000	20%	0.2932
198m 23s (- 628m 15s)	1200	24%	0.2819
199m 31s (- 513m 5s)	1400	28%	0.2887
200m 40s (- 426m 26s)	1600	32%	0.2948
201m 48s (- 358m 45s)	1800	36%	0.2931
202m 55s (- 304m 23s)	2000	40%	0.2873
204m 2s (- 259m 41s)	2200	44%	0.2876
205m 9s (- 222m 15s)	2400	48%	0.2836
206m 17s (- 190m 25s)	2600	52%	0.2891
207m 25s (- 162m 58s)	2800	56%	0.2892
208m 34s (- 139m 2s)	3000	60%	0.2944
209m 41s (- 117m 57s)	3200	64%	0.2972
210m 48s (- 99m 12s)	3400	68%	0.2899
211m 56s (- 82m 25s)	3600	72%	0.2868
213m 4s (- 67m 17s)	3800	76%	0.2965
214m 12s (- 53m 33s)	4000	80%	0.2838
215m 20s (- 41m 0s)	4200	84%	0.2783
216m 27s (- 29m 31s)	4400	88%	0.2749
217m 35s (- 18m 55s)	4600	92%	0.2860
218m 43s (- 9m 6s)	4800	96%	0.2810

In [74]:
torch.save(encoder, enc_dec_path+'p50_tf_encoder.pth')
torch.save(decoder, enc_dec_path+'p50_tf_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [75]:
with open(list_path+'p50_epochs_list.pkl', 'wb') as f:
    pickle.dump(p50_epochs_list, f)
with open(list_path+'p50_times_list.pkl', 'wb') as f:
    pickle.dump(p50_times_list, f)
with open(list_path+'p50_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(p50_avg_loss_list, f)

In [76]:
plt.plot(p50_times_list, p50_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('50% Teacher Forcing Loss over Time')
plt.savefig(plot_path+'p50_tf.png')
plt.show()



In [77]:
test(eval_path+'p50_tf')

75% TF


In [78]:
n_layers = 1
hidden_size = 128
p75_epochs_list = []
p75_times_list = []
p75_avg_loss_list = []

encoder = EncoderRNN(art_emb_t, hidden_size, n_layers).cuda()
decoder = AttnDecoderRNN(hdln_emb_t, hidden_size, n_layers).cuda()
multi_train(encoder, decoder, p75_times_list, p75_avg_loss_list, p75_epochs_list, .75)


LEARNING RATE: 0.003000
1m 6s   (- 26m 37s)	200	4%	2.7064
2m 13s  (- 25m 37s)	400	8%	2.3011
3m 20s  (- 24m 32s)	600	12%	2.0718
4m 27s  (- 23m 26s)	800	16%	1.9383
5m 33s  (- 22m 15s)	1000	20%	1.8090
6m 40s  (- 21m 8s)	1200	24%	1.7506
7m 47s  (- 20m 2s)	1400	28%	1.6368
8m 55s  (- 18m 57s)	1600	32%	1.5930
10m 3s  (- 17m 52s)	1800	36%	1.5095
11m 10s (- 16m 45s)	2000	40%	1.5008
12m 17s (- 15m 38s)	2200	44%	1.4491
13m 23s (- 14m 30s)	2400	48%	1.3735
14m 30s (- 13m 23s)	2600	52%	1.3394
15m 37s (- 12m 16s)	2800	56%	1.3021
16m 44s (- 11m 9s)	3000	60%	1.2709
17m 51s (- 10m 2s)	3200	64%	1.2804
18m 58s (- 8m 55s)	3400	68%	1.2457
20m 4s  (- 7m 48s)	3600	72%	1.2085
21m 12s (- 6m 41s)	3800	76%	1.1809
22m 19s (- 5m 34s)	4000	80%	1.1501
23m 26s (- 4m 27s)	4200	84%	1.1220
24m 33s (- 3m 20s)	4400	88%	1.1080
25m 40s (- 2m 13s)	4600	92%	1.0757
26m 48s (- 1m 7s)	4800	96%	1.0952
LEARNING RATE: 0.003000
29m 1s  (- 696m 39s)	200	4%	1.0751
30m 9s  (- 346m 47s)	400	8%	1.0693
31m 16s (- 229m 18s)	600	12%	1.0717
32m 22s (- 170m 0s)	800	16%	1.0119
33m 29s (- 133m 58s)	1000	20%	1.0288
34m 36s (- 109m 36s)	1200	24%	0.9555
35m 43s (- 91m 52s)	1400	28%	0.9486
36m 50s (- 78m 17s)	1600	32%	0.9429
37m 57s (- 67m 29s)	1800	36%	0.9318
39m 4s  (- 58m 37s)	2000	40%	0.9125
40m 11s (- 51m 9s)	2200	44%	0.8634
41m 18s (- 44m 45s)	2400	48%	0.8704
42m 25s (- 39m 9s)	2600	52%	0.8932
43m 31s (- 34m 12s)	2800	56%	0.8462
44m 39s (- 29m 46s)	3000	60%	0.8310
45m 46s (- 25m 44s)	3200	64%	0.8486
46m 53s (- 22m 3s)	3400	68%	0.8454
48m 0s  (- 18m 40s)	3600	72%	0.8032
49m 7s  (- 15m 30s)	3800	76%	0.8626
50m 13s (- 12m 33s)	4000	80%	0.8225
51m 19s (- 9m 46s)	4200	84%	0.8152
52m 26s (- 7m 9s)	4400	88%	0.7744
53m 33s (- 4m 39s)	4600	92%	0.7721
54m 40s (- 2m 16s)	4800	96%	0.7671
LEARNING RATE: 0.001000
56m 53s (- 1365m 25s)	200	4%	0.7350
58m 0s  (- 667m 2s)	400	8%	0.6949
59m 6s  (- 433m 30s)	600	12%	0.6607
60m 14s (- 316m 15s)	800	16%	0.6533
61m 21s (- 245m 25s)	1000	20%	0.6031
62m 28s (- 197m 51s)	1200	24%	0.5930
63m 35s (- 163m 32s)	1400	28%	0.5504
64m 42s (- 137m 29s)	1600	32%	0.5446
65m 48s (- 116m 59s)	1800	36%	0.5747
66m 54s (- 100m 21s)	2000	40%	0.5290
68m 0s  (- 86m 32s)	2200	44%	0.5167
69m 7s  (- 74m 52s)	2400	48%	0.4991
70m 13s (- 64m 49s)	2600	52%	0.4959
71m 18s (- 56m 1s)	2800	56%	0.4894
72m 23s (- 48m 15s)	3000	60%	0.4846
73m 30s (- 41m 20s)	3200	64%	0.4837
74m 36s (- 35m 6s)	3400	68%	0.4771
75m 43s (- 29m 26s)	3600	72%	0.4822
76m 50s (- 24m 15s)	3800	76%	0.4712
77m 58s (- 19m 29s)	4000	80%	0.4640
79m 5s  (- 15m 3s)	4200	84%	0.4449
80m 12s (- 10m 56s)	4400	88%	0.4498
81m 18s (- 7m 4s)	4600	92%	0.4333
82m 25s (- 3m 26s)	4800	96%	0.4444
LEARNING RATE: 0.001000
84m 40s (- 2032m 4s)	200	4%	0.4473
85m 47s (- 986m 30s)	400	8%	0.4431
86m 53s (- 637m 15s)	600	12%	0.4468
88m 1s  (- 462m 5s)	800	16%	0.4325
89m 7s  (- 356m 30s)	1000	20%	0.4387
90m 13s (- 285m 43s)	1200	24%	0.4311
91m 20s (- 234m 53s)	1400	28%	0.4294
92m 28s (- 196m 30s)	1600	32%	0.3978
93m 35s (- 166m 22s)	1800	36%	0.4060
94m 42s (- 142m 3s)	2000	40%	0.3747
95m 48s (- 121m 56s)	2200	44%	0.4045
96m 55s (- 105m 0s)	2400	48%	0.3910
98m 2s  (- 90m 29s)	2600	52%	0.3802
99m 9s  (- 77m 54s)	2800	56%	0.3912
100m 16s (- 66m 51s)	3000	60%	0.3938
101m 24s (- 57m 2s)	3200	64%	0.4055
102m 31s (- 48m 14s)	3400	68%	0.3765
103m 37s (- 40m 18s)	3600	72%	0.3761
104m 44s (- 33m 4s)	3800	76%	0.4016
105m 52s (- 26m 28s)	4000	80%	0.3787
106m 59s (- 20m 22s)	4200	84%	0.3771
108m 5s (- 14m 44s)	4400	88%	0.3762
109m 12s (- 9m 29s)	4600	92%	0.3580
110m 19s (- 4m 35s)	4800	96%	0.3897
LEARNING RATE: 0.000300
112m 33s (- 2701m 25s)	200	4%	0.3267
113m 40s (- 1307m 19s)	400	8%	0.3366
114m 47s (- 841m 51s)	600	12%	0.3486
115m 55s (- 608m 34s)	800	16%	0.3151
117m 2s (- 468m 11s)	1000	20%	0.3052
118m 10s (- 374m 12s)	1200	24%	0.3032
119m 16s (- 306m 42s)	1400	28%	0.2742
120m 24s (- 255m 51s)	1600	32%	0.3012
121m 31s (- 216m 2s)	1800	36%	0.2834
122m 38s (- 183m 57s)	2000	40%	0.2910
123m 45s (- 157m 30s)	2200	44%	0.2658
124m 52s (- 135m 16s)	2400	48%	0.2821
125m 59s (- 116m 17s)	2600	52%	0.2834
127m 6s (- 99m 51s)	2800	56%	0.2723
128m 13s (- 85m 29s)	3000	60%	0.3011
129m 21s (- 72m 45s)	3200	64%	0.2756
130m 28s (- 61m 23s)	3400	68%	0.2714
131m 34s (- 51m 10s)	3600	72%	0.2567
132m 42s (- 41m 54s)	3800	76%	0.2854
133m 48s (- 33m 27s)	4000	80%	0.2688
134m 55s (- 25m 42s)	4200	84%	0.2741
136m 3s (- 18m 33s)	4400	88%	0.2559
137m 11s (- 11m 55s)	4600	92%	0.2590
138m 18s (- 5m 45s)	4800	96%	0.2535
LEARNING RATE: 0.000300
140m 32s (- 3373m 2s)	200	4%	0.2677
141m 39s (- 1629m 4s)	400	8%	0.2741
142m 46s (- 1047m 1s)	600	12%	0.2568
143m 53s (- 755m 28s)	800	16%	0.2481
145m 1s (- 580m 5s)	1000	20%	0.2553
146m 8s (- 462m 47s)	1200	24%	0.2488
147m 16s (- 378m 42s)	1400	28%	0.2483
148m 23s (- 315m 20s)	1600	32%	0.2452
149m 30s (- 265m 47s)	1800	36%	0.2558
150m 37s (- 225m 55s)	2000	40%	0.2391
151m 44s (- 193m 7s)	2200	44%	0.2502
152m 52s (- 165m 36s)	2400	48%	0.2299
153m 58s (- 142m 8s)	2600	52%	0.2400
155m 5s (- 121m 51s)	2800	56%	0.2250
156m 12s (- 104m 8s)	3000	60%	0.2424
157m 19s (- 88m 29s)	3200	64%	0.2523
158m 26s (- 74m 33s)	3400	68%	0.2443
159m 34s (- 62m 3s)	3600	72%	0.2462
160m 41s (- 50m 44s)	3800	76%	0.2302
161m 48s (- 40m 27s)	4000	80%	0.2382
162m 55s (- 31m 2s)	4200	84%	0.2299
164m 2s (- 22m 22s)	4400	88%	0.2191
165m 8s (- 14m 21s)	4600	92%	0.2127
166m 15s (- 6m 55s)	4800	96%	0.2237
LEARNING RATE: 0.000100
168m 31s (- 4044m 25s)	200	4%	0.2096
169m 37s (- 1950m 40s)	400	8%	0.2118
170m 45s (- 1252m 11s)	600	12%	0.2134
171m 51s (- 902m 15s)	800	16%	0.2146
172m 58s (- 691m 53s)	1000	20%	0.2292
174m 6s (- 551m 19s)	1200	24%	0.2113
175m 12s (- 450m 32s)	1400	28%	0.2153
176m 18s (- 374m 39s)	1600	32%	0.2013
177m 26s (- 315m 27s)	1800	36%	0.2261
178m 33s (- 267m 50s)	2000	40%	0.2122
179m 41s (- 228m 41s)	2200	44%	0.1999
180m 48s (- 195m 52s)	2400	48%	0.2166
181m 54s (- 167m 55s)	2600	52%	0.1989
183m 1s (- 143m 48s)	2800	56%	0.2101
184m 8s (- 122m 45s)	3000	60%	0.1938
185m 15s (- 104m 12s)	3200	64%	0.1971
186m 22s (- 87m 42s)	3400	68%	0.1899
187m 30s (- 72m 55s)	3600	72%	0.1998
188m 37s (- 59m 33s)	3800	76%	0.1978
189m 45s (- 47m 26s)	4000	80%	0.1926
190m 52s (- 36m 21s)	4200	84%	0.1962
191m 58s (- 26m 10s)	4400	88%	0.2055
193m 6s (- 16m 47s)	4600	92%	0.1964
194m 12s (- 8m 5s)	4800	96%	0.1992
LEARNING RATE: 0.000030
196m 26s (- 4714m 36s)	200	4%	0.1905
197m 33s (- 2272m 0s)	400	8%	0.1998
198m 41s (- 1457m 7s)	600	12%	0.1975
199m 48s (- 1049m 0s)	800	16%	0.2011
200m 56s (- 803m 46s)	1000	20%	0.2061
202m 3s (- 639m 50s)	1200	24%	0.1921
203m 10s (- 522m 26s)	1400	28%	0.1898
204m 17s (- 434m 6s)	1600	32%	0.1898
205m 24s (- 365m 10s)	1800	36%	0.1984
206m 31s (- 309m 47s)	2000	40%	0.2011
207m 39s (- 264m 17s)	2200	44%	0.1936
208m 47s (- 226m 11s)	2400	48%	0.1901
209m 54s (- 193m 45s)	2600	52%	0.2003
211m 0s (- 165m 47s)	2800	56%	0.1918
212m 7s (- 141m 25s)	3000	60%	0.1915
213m 13s (- 119m 56s)	3200	64%	0.1862
214m 21s (- 100m 52s)	3400	68%	0.1996
215m 27s (- 83m 47s)	3600	72%	0.1889
216m 33s (- 68m 23s)	3800	76%	0.2022
217m 40s (- 54m 25s)	4000	80%	0.1799
218m 47s (- 41m 40s)	4200	84%	0.2112
219m 54s (- 29m 59s)	4400	88%	0.1945
221m 1s (- 19m 13s)	4600	92%	0.1925
222m 8s (- 9m 15s)	4800	96%	0.1929

In [79]:
torch.save(encoder, enc_dec_path+'p75_tf_encoder.pth')
torch.save(decoder, enc_dec_path+'p75_tf_decoder.pth')


/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type EncoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "
/home/psxca1/anaconda3/lib/python3.6/site-packages/torch/serialization.py:147: UserWarning: Couldn't retrieve source code for container of type AttnDecoderRNN. It won't be checked for correctness upon loading.
  "type " + obj.__name__ + ". It won't be checked "

In [80]:
with open(list_path+'p75_epochs_list.pkl', 'wb') as f:
    pickle.dump(p75_epochs_list, f)
with open(list_path+'p75_times_list.pkl', 'wb') as f:
    pickle.dump(p75_times_list, f)
with open(list_path+'p75_avg_loss_list.pkl', 'wb') as f:
    pickle.dump(p75_avg_loss_list, f)

In [81]:
plt.plot(p75_times_list, p75_avg_loss_list)
plt.xlabel('Training Time (Minutes)')
plt.ylabel('Average Loss')
plt.title('75% Teacher Forcing Loss over Time')
plt.savefig(plot_path+'p75_tf.png')
plt.show()



In [82]:
test(eval_path+'p75_tf')