In [1]:
import unicodedata
import string
import re
import random
import time
import datetime
import math
import socket
hostname = socket.gethostname()
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence#, masked_cross_entropy
# from masked_cross_entropy import *
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
%matplotlib inline
verbose = True
In [2]:
USE_CUDA = False
In [3]:
PAD_token = 0
SOS_token = 1
EOS_token = 2
class Lang:
def __init__(self, name):
self.name = name
self.trimmed = False
self.word2index = {}
self.word2count = {}
self.index2word = {0: "PAD", 1: "SOS", 2: "EOS"}
self.n_words = 3 # Count default tokens
def index_words(self, sentence):
for word in sentence:
self.index_word(word)
def index_word(self, word):
if word not in self.word2index:
self.word2index[word] = self.n_words
self.word2count[word] = 1
self.index2word[self.n_words] = word
self.n_words += 1
else:
self.word2count[word] += 1
# Remove words below a certain count threshold
def trim(self, min_count):
if self.trimmed: return
self.trimmed = True
keep_words = []
for k, v in self.word2count.items():
if v >= min_count:
keep_words.append(k)
print('keep_words %s / %s = %.4f' % (
len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
))
# Reinitialize dictionaries
self.word2index = {}
self.word2count = {}
self.index2word = {0: "PAD", 1: "SOS", 2: "EOS"}
self.n_words = 3 # Count default tokens
for word in keep_words:
self.index_word(word)
In [4]:
from src.python.geneontology import *
from src.python.preprocess import *
onto = get_ontology('F')
from pymongo import MongoClient
client = MongoClient("mongodb://127.0.0.1:27017")
def load_data(db, asp, codes=exp_codes, limit=None):
q = {'Evidence': {'$in': codes}, 'DB': 'UniProtKB'}
c = limit if limit else db.goa_uniprot.count(q)
s = db.goa_uniprot.find(q)
if limit: s = s.limit(limit)
seqid2goid, goid2seqid = GoAnnotationCollectionLoader(s, c, asp).load()
query = {"_id": {"$in": unique(list(seqid2goid.keys())).tolist()}}
num_seq = db.uniprot.count(query)
src_seq = db.uniprot.find(query)
seqid2seq = UniprotCollectionLoader(src_seq, num_seq).load()
return seqid2seq, goid2seqid, seqid2goid
seqid2seq, goid2seqid, seqid2goid = load_data(client['prot2vec'], 'F', limit=None)
In [5]:
# MIN_LENGTH = 3
# MAX_LENGTH = 25
MIN_LENGTH = 20
MAX_LENGTH = 500
def filter_pairs(pairs_gen):
filtered_pairs = []
original_pairs = []
for pair in pairs_gen:
original_pairs.append(pair)
if len(pair[0]) >= MIN_LENGTH and len(pair[0]) <= MAX_LENGTH \
and len(pair[1]) >= MIN_LENGTH and len(pair[1]) <= MAX_LENGTH:
filtered_pairs.append(pair)
return original_pairs, filtered_pairs
In [6]:
def prepare_data():
pairs_gen = ((seqid2seq[seqid], onto.sort(onto.augment(annots)))
for (seqid, annots) in seqid2goid.items())
input_lang = Lang("AA")
output_lang = Lang("GO")
pairs1, pairs2 = filter_pairs(pairs_gen)
print("Filtered %d to %d pairs" % (len(pairs1), len(pairs2)))
print("Indexing words...")
for pair in pairs2:
input_lang.index_words(pair[0])
output_lang.index_words(pair[1])
print('Indexed %d words in input language, %d words in output' % (input_lang.n_words, output_lang.n_words))
return input_lang, output_lang, pairs2
input_lang, output_lang, pairs = prepare_data()
In [7]:
# MIN_COUNT = 5
MIN_COUNT = 2
input_lang.trim(MIN_COUNT)
output_lang.trim(MIN_COUNT)
In [8]:
keep_pairs = []
for i, pair in enumerate(pairs):
n = len(pairs)
if verbose:
sys.stdout.write("\r{0:.0f}%".format(100.0 * i/n))
input_seq = pair[0]
output_annots = pair[1]
keep_input = True
keep_output = True
for word in input_seq:
if word not in input_lang.word2index:
keep_input = False
break
for word in output_annots:
if word not in output_lang.word2index:
keep_output = False
break
# Remove if pair doesn't match input and output conditions
if keep_input and keep_output:
keep_pairs.append(pair)
print("\nTrimmed from %d pairs to %d, %.4f of total" % (len(pairs), len(keep_pairs), len(keep_pairs) / len(pairs)))
pairs = keep_pairs
In [9]:
# Return a list of indexes, one for each word in the sentence, plus EOS
def indexes_from_sequence(lang, seq):
return [lang.word2index[word] for word in seq] + [EOS_token]
In [10]:
# Pad a with the PAD symbol
def pad_seq(seq, max_length):
seq += [PAD_token for i in range(max_length - len(seq))]
return seq
In [11]:
def random_batch(batch_size):
input_seqs = []
target_seqs = []
# Choose random pairs
for i in range(batch_size):
pair = random.choice(pairs)
input_seqs.append(indexes_from_sequence(input_lang, pair[0]))
target_seqs.append(indexes_from_sequence(output_lang, pair[1]))
# Zip into pairs, sort by length (descending), unzip
seq_pairs = sorted(zip(input_seqs, target_seqs), key=lambda p: len(p[0]), reverse=True)
input_seqs, target_seqs = zip(*seq_pairs)
# For input and target sequences, get array of lengths and pad with 0s to max length
input_lengths = [len(s) for s in input_seqs]
input_padded = [pad_seq(s, max(input_lengths)) for s in input_seqs]
target_lengths = [len(s) for s in target_seqs]
target_padded = [pad_seq(s, max(target_lengths)) for s in target_seqs]
# Turn padded arrays into (batch_size x max_len) tensors, transpose into (max_len x batch_size)
input_var = Variable(torch.LongTensor(input_padded)).transpose(0, 1)
target_var = Variable(torch.LongTensor(target_padded)).transpose(0, 1)
if USE_CUDA:
input_var = input_var.cuda()
target_var = target_var.cuda()
return input_var, input_lengths, target_var, target_lengths
In [12]:
random_batch(2)
Out[12]:
In [13]:
from torch.nn import functional
def sequence_mask(sequence_length, max_len=None):
if max_len is None:
max_len = sequence_length.data.max()
batch_size = sequence_length.size(0)
seq_range = torch.arange(0, max_len).long()
seq_range_expand = seq_range.unsqueeze(0).expand(batch_size, max_len)
seq_range_expand = Variable(seq_range_expand)
# if sequence_length.is_cuda:
if USE_CUDA:
seq_range_expand = seq_range_expand.cuda()
seq_length_expand = (sequence_length.unsqueeze(1)
.expand_as(seq_range_expand))
return seq_range_expand < seq_length_expand
def masked_cross_entropy(logits, target, length):
length = Variable(torch.LongTensor(length))
if USE_CUDA:
length = length.cuda()
"""
Args:
logits: A Variable containing a FloatTensor of size
(batch, max_len, num_classes) which contains the
unnormalized probability for each class.
target: A Variable containing a LongTensor of size
(batch, max_len) which contains the index of the true
class for each corresponding step.
length: A Variable containing a LongTensor of size (batch,)
which contains the length of each data in a batch.
Returns:
loss: An average loss value masked by the length.
"""
# logits_flat: (batch * max_len, num_classes)
logits_flat = logits.view(-1, logits.size(-1))
# log_probs_flat: (batch * max_len, num_classes)
log_probs_flat = functional.log_softmax(logits_flat)
# target_flat: (batch * max_len, 1)
target_flat = target.view(-1, 1)
# losses_flat: (batch * max_len, 1)
losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
# losses: (batch, max_len)
losses = losses_flat.view(*target.size())
# mask: (batch, max_len)
mask = sequence_mask(sequence_length=length, max_len=target.size(1))
losses = losses * mask.float()
loss = losses.sum() / length.float().sum()
return loss
In [14]:
class EncoderRNN(nn.Module):
def __init__(self, input_size, hidden_size, n_layers=1, dropout=0.1):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.n_layers = n_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=self.dropout, bidirectional=True)
def forward(self, input_seqs, input_lengths, hidden=None):
# Note: we run this all at once (over multiple batches of multiple sequences)
embedded = self.embedding(input_seqs)
packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
outputs, hidden = self.gru(packed, hidden)
outputs, output_lengths = torch.nn.utils.rnn.pad_packed_sequence(outputs) # unpack (back to padded)
outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs
return outputs, hidden
In [15]:
class Attn(nn.Module):
def __init__(self, method, hidden_size):
super(Attn, self).__init__()
self.method = method
self.hidden_size = hidden_size
if self.method == 'general':
self.attn = nn.Linear(self.hidden_size, hidden_size)
elif self.method == 'concat':
self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
self.v = nn.Parameter(torch.FloatTensor(1, hidden_size))
def forward(self, hidden, encoder_outputs):
max_len = encoder_outputs.size(0)
this_batch_size = encoder_outputs.size(1)
# Create variable to store attention energies
attn_energies = Variable(torch.zeros(this_batch_size, max_len)) # B x S
if USE_CUDA:
attn_energies = attn_energies.cuda()
# For each batch of encoder outputs
for b in range(this_batch_size):
# Calculate energy for each encoder output
for i in range(max_len):
attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))
# Normalize energies to weights in range 0 to 1, resize to 1 x B x S
return F.softmax(attn_energies).unsqueeze(1)
def score(self, hidden, encoder_output):
if self.method == 'dot':
energy = torch.dot(hidden.view(-1), encoder_output.view(-1))
return energy
elif self.method == 'general':
energy = self.attn(encoder_output)
energy = torch.dot(hidden.view(-1), energy.view(-1))
return energy
elif self.method == 'concat':
energy = self.attn(torch.cat((hidden, encoder_output), 1))
energy = self.v.dot(energy)
return energy
In [16]:
class BahdanauAttnDecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size, n_layers=1, dropout_p=0.1):
super(BahdanauAttnDecoderRNN, self).__init__()
# Define parameters
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.dropout_p = dropout_p
self.max_length = max_length
# Define layers
self.embedding = nn.Embedding(output_size, hidden_size)
self.dropout = nn.Dropout(dropout_p)
self.attn = Attn('concat', hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout_p)
self.out = nn.Linear(hidden_size, output_size)
def forward(self, word_input, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# TODO: FIX BATCHING
# Get the embedding of the current input word (last output word)
word_embedded = self.embedding(word_input).view(1, 1, -1) # S=1 x B x N
word_embedded = self.dropout(word_embedded)
# Calculate attention weights and apply to encoder outputs
attn_weights = self.attn(last_hidden[-1], encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x 1 x N
context = context.transpose(0, 1) # 1 x B x N
# Combine embedded input word and attended context, run through RNN
rnn_input = torch.cat((word_embedded, context), 2)
output, hidden = self.gru(rnn_input, last_hidden)
# Final output layer
output = output.squeeze(0) # B x N
output = F.log_softmax(self.out(torch.cat((output, context), 1)))
# Return final output, hidden state, and attention weights (for visualization)
return output, hidden, attn_weights
In [17]:
class LuongAttnDecoderRNN(nn.Module):
def __init__(self, attn_model, hidden_size, output_size, n_layers=1, dropout=0.1):
super(LuongAttnDecoderRNN, self).__init__()
# Keep for reference
self.attn_model = attn_model
self.hidden_size = hidden_size
self.output_size = output_size
self.n_layers = n_layers
self.dropout = dropout
# Define layers
self.embedding = nn.Embedding(output_size, hidden_size)
self.embedding_dropout = nn.Dropout(dropout)
self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=dropout)
self.concat = nn.Linear(hidden_size * 2, hidden_size)
self.out = nn.Linear(hidden_size, output_size)
# Choose attention model
if attn_model != 'none':
self.attn = Attn(attn_model, hidden_size)
def forward(self, input_seq, last_hidden, encoder_outputs):
# Note: we run this one step at a time
# Get the embedding of the current input word (last output word)
batch_size = input_seq.size(0)
embedded = self.embedding(input_seq)
embedded = self.embedding_dropout(embedded)
embedded = embedded.view(1, batch_size, self.hidden_size) # S=1 x B x N
# Get current hidden state from input word and last hidden state
rnn_output, hidden = self.gru(embedded, last_hidden)
# Calculate attention from current RNN state and all encoder outputs;
# apply to encoder outputs to get weighted average
attn_weights = self.attn(rnn_output, encoder_outputs)
context = attn_weights.bmm(encoder_outputs.transpose(0, 1)) # B x S=1 x N
# Attentional vector using the RNN hidden state and context vector
# concatenated together (Luong eq. 5)
rnn_output = rnn_output.squeeze(0) # S=1 x B x N -> B x N
context = context.squeeze(1) # B x S=1 x N -> B x N
concat_input = torch.cat((rnn_output, context), 1)
concat_output = F.tanh(self.concat(concat_input))
# Finally predict next token (Luong eq. 6, without softmax)
output = self.out(concat_output)
# Return final output, hidden state, and attention weights (for visualization)
return output, hidden, attn_weights
In [18]:
small_batch_size = 3
input_batches, input_lengths, target_batches, target_lengths = random_batch(small_batch_size)
print('input_batches', input_batches.size()) # (max_len x batch_size)
print('target_batches', target_batches.size()) # (max_len x batch_size)
In [19]:
small_hidden_size = 8
small_n_layers = 2
encoder_test = EncoderRNN(input_lang.n_words, small_hidden_size, small_n_layers)
decoder_test = LuongAttnDecoderRNN('general', small_hidden_size, output_lang.n_words, small_n_layers)
if USE_CUDA:
encoder_test.cuda()
decoder_test.cuda()
In [20]:
encoder_outputs, encoder_hidden = encoder_test(input_batches, input_lengths, None)
print('encoder_outputs', encoder_outputs.size()) # max_len x batch_size x hidden_size
print('encoder_hidden', encoder_hidden.size()) # n_layers * 2 x batch_size x hidden_size
In [21]:
max_target_length = max(target_lengths)
# Prepare decoder input and outputs
decoder_input = Variable(torch.LongTensor([SOS_token] * small_batch_size))
decoder_hidden = encoder_hidden[:decoder_test.n_layers] # Use last (forward) hidden state from encoder
all_decoder_outputs = Variable(torch.zeros(max_target_length, small_batch_size, decoder_test.output_size))
if USE_CUDA:
all_decoder_outputs = all_decoder_outputs.cuda()
decoder_input = decoder_input.cuda()
# Run through decoder one time step at a time
for t in range(max_target_length):
decoder_output, decoder_hidden, decoder_attn = decoder_test(
decoder_input, decoder_hidden, encoder_outputs
)
all_decoder_outputs[t] = decoder_output # Store this step's outputs
decoder_input = target_batches[t] # Next input is current target
# Test masked cross entropy loss
loss = masked_cross_entropy(
all_decoder_outputs.transpose(0, 1).contiguous(),
target_batches.transpose(0, 1).contiguous(),
target_lengths
)
print('loss', loss.data[0])
In [22]:
# Configure models
attn_model = 'dot'
hidden_size = 500
n_layers = 2
dropout = 0.1
batch_size = 100
batch_size = 50
# Configure training/optimization
clip = 50.0
teacher_forcing_ratio = 0.5
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_epochs = 50000
epoch = 0
plot_every = 20
print_every = 20
evaluate_every = 1000
# Initialize models
encoder = EncoderRNN(input_lang.n_words, hidden_size, n_layers, dropout=dropout)
decoder = LuongAttnDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout=dropout)
# Initialize optimizers and criterion
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate * decoder_learning_ratio)
criterion = nn.CrossEntropyLoss()
# Move models to GPU
if USE_CUDA:
encoder.cuda()
decoder.cuda()
import sconce
job = sconce.Job('seq2seq-translate', {
'attn_model': attn_model,
'n_layers': n_layers,
'dropout': dropout,
'hidden_size': hidden_size,
'learning_rate': learning_rate,
'clip': clip,
'teacher_forcing_ratio': teacher_forcing_ratio,
'decoder_learning_ratio': decoder_learning_ratio,
})
job.plot_every = plot_every
job.log_every = print_every
# Keep track of time elapsed and running averages
start = time.time()
plot_losses = []
print_loss_total = 0 # Reset every print_every
plot_loss_total = 0 # Reset every plot_every
In [23]:
def train(input_batches, input_lengths, target_batches, target_lengths, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH):
# Zero gradients of both optimizers
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()
loss = 0 # Added onto for each word
# Run words through encoder
encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths, None)
# Prepare input and output variables
decoder_input = Variable(torch.LongTensor([SOS_token] * batch_size))
decoder_hidden = encoder_hidden[:decoder.n_layers] # Use last (forward) hidden state from encoder
max_target_length = max(target_lengths)
all_decoder_outputs = Variable(torch.zeros(max_target_length, batch_size, decoder.output_size))
# Move new Variables to CUDA
if USE_CUDA:
decoder_input = decoder_input.cuda()
all_decoder_outputs = all_decoder_outputs.cuda()
# Run through decoder one time step at a time
for t in range(max_target_length):
decoder_output, decoder_hidden, decoder_attn = decoder(
decoder_input, decoder_hidden, encoder_outputs
)
all_decoder_outputs[t] = decoder_output
decoder_input = target_batches[t] # Next input is current target
# Loss calculation and backpropagation
loss = masked_cross_entropy(
all_decoder_outputs.transpose(0, 1).contiguous(), # -> batch x seq
target_batches.transpose(0, 1).contiguous(), # -> batch x seq
target_lengths
)
loss.backward()
# Clip gradient norms
ec = torch.nn.utils.clip_grad_norm(encoder.parameters(), clip)
dc = torch.nn.utils.clip_grad_norm(decoder.parameters(), clip)
# Update parameters with optimizers
encoder_optimizer.step()
decoder_optimizer.step()
return loss.data[0], ec, dc
In [24]:
def as_minutes(s):
m = math.floor(s / 60)
s -= m * 60
return '%dm %ds' % (m, s)
def time_since(since, percent):
now = time.time()
s = now - since
es = s / (percent)
rs = es - s
return '%s (- %s)' % (as_minutes(s), as_minutes(rs))
In [25]:
def evaluate(input_seq, max_length=MAX_LENGTH):
input_lengths = [len(input_seq)]
input_seqs = [indexes_from_sequence(input_lang, input_seq)]
input_batches = Variable(torch.LongTensor(input_seqs), volatile=True).transpose(0, 1)
if USE_CUDA:
input_batches = input_batches.cuda()
# Set to not-training mode to disable dropout
encoder.train(False)
decoder.train(False)
# Run through encoder
encoder_outputs, encoder_hidden = encoder(input_batches, input_lengths, None)
# Create starting vectors for decoder
decoder_input = Variable(torch.LongTensor([SOS_token]), volatile=True) # SOS
decoder_hidden = encoder_hidden[:decoder.n_layers] # Use last (forward) hidden state from encoder
if USE_CUDA:
decoder_input = decoder_input.cuda()
# Store output words and attention states
decoded_words = []
decoder_attentions = torch.zeros(max_length + 1, max_length + 1)
# Run through decoder
for di in range(max_length):
decoder_output, decoder_hidden, decoder_attention = decoder(
decoder_input, decoder_hidden, encoder_outputs
)
decoder_attentions[di,:decoder_attention.size(2)] += decoder_attention.squeeze(0).squeeze(0).cpu().data
# Choose top word from output
topv, topi = decoder_output.data.topk(1)
ni = topi[0][0]
if ni == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(output_lang.index2word[ni])
# Next input is chosen word
decoder_input = Variable(torch.LongTensor([ni]))
if USE_CUDA: decoder_input = decoder_input.cuda()
# Set back to training mode
encoder.train(True)
decoder.train(True)
return decoded_words, decoder_attentions[:di+1, :len(encoder_outputs)]
In [26]:
def evaluate_randomly():
[input_sentence, target_sentence] = random.choice(pairs)
evaluate_and_show_attention(input_sentence, target_sentence)
In [27]:
import io
import torchvision
from PIL import Image
import visdom
vis = visdom.Visdom()
def show_plot_visdom():
buf = io.BytesIO()
plt.savefig(buf)
buf.seek(0)
attn_win = 'attention (%s)' % hostname
im = Image.open(buf).convert("RGB")
vis.image(torchvision.transforms.ToTensor()(im), win=attn_win, opts={'title': attn_win})
In [28]:
def show_attention(input_sentence, output_words, attentions):
# Set up figure with colorbar
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(attentions.numpy(), cmap='bone')
fig.colorbar(cax)
# Set up axes
ax.set_xticklabels([''] + input_sentence.split(' ') + ['<EOS>'], rotation=90)
ax.set_yticklabels([''] + output_words)
# Show label at every tick
ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
ax.yaxis.set_major_locator(ticker.MultipleLocator(1))
show_plot_visdom()
plt.show()
plt.close()
In [ ]:
def evaluate_and_show_attention(input_sentence, target_sentence=None):
output_words, attentions = evaluate(input_sentence)
output_sentence = ' '.join(output_words)
print('>', input_sentence)
if target_sentence is not None:
print('=', target_sentence)
print('<', output_sentence)
show_attention(input_sentence, output_words, attentions)
# Show input, target, output text in visdom
win = 'evaluted (%s)' % hostname
text = '<p>> %s</p><p>= %s</p><p>< %s</p>' % (input_sentence, target_sentence, output_sentence)
vis.text(text, win=win, opts={'title': win})
In [ ]:
# Begin!
ecs = []
dcs = []
eca = 0
dca = 0
while epoch < n_epochs:
epoch += 1
# Get training data for this cycle
input_batches, input_lengths, target_batches, target_lengths = random_batch(batch_size)
# Run the train function
loss, ec, dc = train(
input_batches, input_lengths, target_batches, target_lengths,
encoder, decoder,
encoder_optimizer, decoder_optimizer, criterion
)
# Keep track of loss
print_loss_total += loss
plot_loss_total += loss
eca += ec
dca += dc
job.record(epoch, loss)
if epoch % print_every == 0:
print_loss_avg = print_loss_total / print_every
print_loss_total = 0
print_summary = '%s (%d %d%%) %.4f' % (time_since(start, epoch / n_epochs), epoch, epoch / n_epochs * 100, print_loss_avg)
print(print_summary)
if epoch % evaluate_every == 0:
evaluate_randomly()
if epoch % plot_every == 0:
plot_loss_avg = plot_loss_total / plot_every
plot_losses.append(plot_loss_avg)
plot_loss_total = 0
# TODO: Running average helper
ecs.append(eca / plot_every)
dcs.append(dca / plot_every)
ecs_win = 'encoder grad (%s)' % hostname
dcs_win = 'decoder grad (%s)' % hostname
vis.line(np.array(ecs), win=ecs_win, opts={'title': ecs_win})
vis.line(np.array(dcs), win=dcs_win, opts={'title': dcs_win})
eca = 0
dca = 0
In [ ]:
def show_plot(points):
plt.figure()
fig, ax = plt.subplots()
loc = ticker.MultipleLocator(base=0.2) # put ticks at regular intervals
ax.yaxis.set_major_locator(loc)
plt.plot(points)
show_plot(plot_losses)