In [2]:
import argparse
import os
import shutil
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader
from dpp_nets.utils.io import make_embd, make_tensor_dataset
from dpp_nets.my_torch.utilities import pad_tensor
from torch.utils.data import DataLoader
import string
import nltk
import string
import numpy as np
import torch
import nltk
from nltk.corpus import stopwords
import torch
import torch.nn as nn
from collections import OrderedDict
import gzip
from torch.utils.data import Dataset
In [3]:
data_path = '/Users/Max/data/beer_reviews/reviews.all.train.chunks.txt.gz'
word_path = '/Users/Max/data/beer_reviews/reviews.all.train.words.txt.gz'
embd_path = '/Users/Max/data/beer_reviews/review+wiki.filtered.200.txt.gz'
In [3]:
class Vocabulary:
def __init__(self):
# Basic Indexing
self.word2index = {}
self.index2word = {}
# Keeping track of vocabulary
self.vocab_size = 0
self.word2count = {}
# Vector Dictionaries
self.pretrained = {}
self.random = {}
self.word2vec = {}
self.index2vec = {}
# Set of Stop Words
self.stop_words = set()
self.Embedding = None
self.EmbeddingBag = None
def setStops(self):
self.stop_words = set(nltk.corpus.stopwords.words('english'))
make_stops = set(string.punctuation + '\n' + '\t' + '...')
unmake_stops = set(('no', 'not'))
self.stop_words = self.stop_words.union(make_stops)
self.stop_words = self.stop_words.difference(unmake_stops)
def loadPretrained(self, embd_path):
self.pretrained = {}
with gzip.open(embd_path, 'rt') as f:
for line in f:
line = line.strip()
if line:
word, *embd = line.split()
vec = torch.FloatTensor([float(dim) for dim in embd])
self.pretrained[word] = vec
def loadCorpus(self, word_path):
with gzip.open(data_path, 'rt') as f:
for line in f:
_, review = line.split('\D')
review = tuple(tuple(chunk.split('\W')) for chunk in review.split('\T'))
for words in review:
vocab.addWords(words)
def addWords(self, words):
"""
words: seq containing variable no of words
"""
for word in words:
self.addWord(word)
def addWord(self, word):
if word not in self.word2index:
# Keeping track of vocabulary
self.vocab_size += 1
self.word2count[word] = 1
# Basic Indexing
self.word2index[word] = self.vocab_size
self.index2word[self.vocab_size] = word
# Add word vector
if word in self.pretrained:
vec = self.pretrained[word]
self.word2vec[word] = vec
self.index2vec[self.vocab_size] = vec
else:
vec = torch.randn(200)
self.random[word] = vec
self.word2vec[word] = vec
self.index2vec[self.vocab_size] = vec
else:
self.word2count[word] += 1
def updateEmbedding(self):
vocab_size = len(self.index2vec) + 1
EMBD_DIM = 200
self.Embedding = nn.Embedding(vocab_size, EMBD_DIM, padding_idx=0)
self.EmbeddingBag = nn.EmbeddingBag(vocab_size, EMBD_DIM)
embd_matrix = torch.zeros(vocab_size, EMBD_DIM)
for ix, vec in vocab.index2vec.items():
embd_matrix[ix] = vec
embd_dict = OrderedDict([('weight', embd_matrix)])
self.Embedding.load_state_dict(embd_dict)
self.EmbeddingBag.load_state_dict(embd_dict)
def checkWord(self, word, min_count):
if word not in vocab.stop_words and word in vocab.word2index and vocab.word2index[word] > min_count:
return word
def filterReview(self, review):
"""
review should be like our data set
"""
f_review = []
seen = set()
for tup in review:
f_tuple = []
for word in tup:
word = self.checkWord(word, 10)
if word:
f_tuple.append(word)
f_tuple = tuple(f_tuple)
if f_tuple and f_tuple not in seen:
seen.add(f_tuple)
f_review.append(f_tuple)
return f_review
def mapIndicesBatch(self, reviews):
f_review = []
offset = []
i = 0
for review in reviews:
seen = set()
for tup in review:
f_tuple = []
for word in tup:
word = vocab.checkWord(word, 10)
if word:
f_tuple.append(word)
f_tuple = tuple(f_tuple)
if f_tuple and f_tuple not in seen:
seen.add(f_tuple)
f_review.extend([vocab.word2index[word] for word in f_tuple])
offset.append(i)
i += len(f_tuple)
f_review, offset = torch.LongTensor(f_review), torch.LongTensor(offset)
return f_review, offset
def mapIndices(self, review):
f_review = []
offset = []
seen = set()
i = 0
for tup in review:
f_tuple = []
for word in tup:
word = vocab.checkWord(word, 10)
if word:
f_tuple.append(word)
f_tuple = tuple(f_tuple)
if f_tuple and f_tuple not in seen:
seen.add(f_tuple)
f_review.extend([vocab.word2index[word] for word in f_tuple])
offset.append(i)
i += len(f_tuple)
f_review, offset = torch.LongTensor(f_review), torch.LongTensor(offset)
return f_review, offset
def returnEmbds(self, review):
f_review = []
offset = []
seen = set()
i = 0
for tup in review:
f_tuple = []
for word in tup:
word = vocab.checkWord(word, 10)
if word:
f_tuple.append(word)
f_tuple = tuple(f_tuple)
if f_tuple and f_tuple not in seen:
seen.add(f_tuple)
f_review.extend([vocab.word2index[word] for word in f_tuple])
offset.append(i)
i += len(f_tuple)
f_review, offset = Variable(torch.LongTensor(f_review)), Variable(torch.LongTensor(offset))
embd = self.EmbeddingBag(f_review, offset)
return embd
In [4]:
class BeerDataset(Dataset):
"""BeerDataset."""
def __init__(self, data_path, aspect='all'):
# Compute size of the data set
self.aspect = aspect
self.vocab = vocab
with gzip.open(data_path, 'rt') as f:
self.lines = f.readlines()
def __len__(self):
return len(self.lines)
def __getitem__(self, idx):
# Decode
target, review = self.lines[idx].split('\D')
# Target
target = torch.FloatTensor([float(t) for t in target.split()[:3]])
# Review
review = tuple(tuple(chunk.split('\W')) for chunk in review.split('\T'))
#ixs, offset = self.vocab.mapIndices(review)
#sample = {'ixs': ixs, 'offset': offset, 'target': target}
sample = {'review': review, 'target': target}
return sample
In [5]:
vocab = Vocabulary()
vocab.loadPretrained(embd_path)
vocab.setStops()
vocab.loadCorpus(word_path)
vocab.updateEmbedding()
ds = BeerDataset(data_path, vocab)
In [6]:
a = 'abc'
b = 'def'
os.path.join(a,b)
Out[6]:
In [8]:
# Solution 2 - using mycollate2 + new KernelNetwork
from dpp_nets.layers.layers import MarginalSampler, PredNet
def my_collate2(batch, vocab=vocab):
# Create indices
s_ix, e_ix, i = [], [], 0
for l in [len(vocab.filterReview(d['review'])) for d in batch]:
s_ix.append(i)
i += l
e_ix.append(i)
# Map to Embeddings
batch_review = [review['review'] for review in batch]
ixs, offsets = vocab.mapIndicesBatch(batch_review)
embd = vocab.EmbeddingBag(Variable(ixs), Variable(offsets))
# Create target vector
target_tensor = Variable(torch.stack([d['target'] for d in batch]))
return embd, target_tensor, s_ix, e_ix
class KernelVar(nn.Module):
def __init__(self, embd_dim, hidden_dim, kernel_dim):
"""
Currently, this creates a 2-hidden-layer network
with ELU non-linearities.
"""
super(KernelVar, self).__init__()
self.embd_dim = embd_dim
self.hidden_dim = hidden_dim
self.kernel_dim = kernel_dim
self.layer1 = nn.Linear(2 * embd_dim, hidden_dim)
self.layer2 = nn.Linear(hidden_dim, hidden_dim)
self.layer3 = nn.Linear(hidden_dim, kernel_dim)
self.net = nn.Sequential(self.layer1, nn.Tanh(), self.layer2, nn.Tanh(), self.layer3)
self.s_ix = None
self.e_ix = None
def forward(self, embd):
"""
Given words, returns batch_kernel of dimension
[-1, kernel_dim]
"""
# Create context
context = []
for s, e in zip(self.s_ix, self.e_ix):
text = embd[s:e].sum(0, keepdim=True).expand_as(embd[s:e])
context.append(text)
context = torch.cat(context, dim=0)
batch_x = torch.cat([embd, context], dim=1)
batch_kernel = self.net(batch_x)
return batch_kernel , embd
from timeit import default_timer
start = default_timer()
dl = DataLoader(ds, batch_size=500, collate_fn=my_collate2)
for batch in dl:
break
embd, target, s_ix, e_ix = batch
embd_dim = 200
hidden_dim = 500
kernel_dim = 200
enc_dim = 200
target_dim = 3
kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
kernel_net.s_ix, kernel_net.e_ix = s_ix, e_ix
sampler = MarginalSampler()
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)
criterion = nn.MSELoss()
activation = nn.Sigmoid()
pred = None
pred_loss = None
reg_loss = None
loss = None
reg = 10
reg_mean = 0.1
kernel, words = kernel_net(embd) # returned words are masked now!
sampler.s_ix = kernel_net.s_ix
sampler.e_ix = kernel_net.e_ix
weighted_words = sampler(kernel, words)
pred_net.s_ix = sampler.s_ix
pred_net.e_ix = sampler.e_ix
pred = pred_net(weighted_words)
target = batch[1]
if activation:
pred = activation(pred)
pred_loss = criterion(pred, target)
if reg:
reg_loss = reg * (torch.stack(sampler.exp_sizes) - reg_mean).pow(2).mean()
loss = pred_loss + reg_loss
else:
loss = pred_loss
loss.backward()
duration = default_timer() - start
print(duration)
In [56]:
def my_collate(batch, vocab=vocab):
# Count sizes
max_no_chunks = 0
for d in batch:
max_no_chunks = max(max_no_chunks, len(vocab.filterReview(d['review'])))
# Map to Embeddings
reps = []
for d in batch:
rep = vocab.returnEmbds(d['review'])
rep = torch.cat([rep, Variable(torch.zeros(max_no_chunks + 1 - rep.size(0), rep.size(1)))], dim=0)
reps.append(rep)
data_tensor = torch.stack(reps)
# Create target vector
# target_tensor = Variable(torch.stack([d['target'] for d in batch]))
target_tensor = Variable(torch.stack([d['target'] for d in batch]))
return data_tensor, target_tensor
# Solution 1 using my_collate
from timeit import default_timer
from dpp_nets.layers.layers import KernelVar, MarginalSampler, PredNet
start = default_timer()
dl = DataLoader(ds, batch_size=500, collate_fn=my_collate)
for batch in dl:
break
words = batch[0]
kernel_net = KernelVar(200,500,200)
embd_dim = 200
hidden_dim = 500
kernel_dim = 200
enc_dim = 200
target_dim = 3
kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = MarginalSampler()
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)
criterion = nn.MSELoss()
activation = nn.Sigmoid()
pred = None
pred_loss = None
reg_loss = None
loss = None
reg = 10
reg_mean = 0.1
kernel, words = kernel_net(words) # returned words are masked now!
sampler.s_ix = kernel_net.s_ix
sampler.e_ix = kernel_net.e_ix
weighted_words = sampler(kernel, words)
pred_net.s_ix = sampler.s_ix
pred_net.e_ix = sampler.e_ix
pred = pred_net(weighted_words)
target = batch[1]
if activation:
pred = activation(pred)
pred_loss = criterion(pred, target)
if reg:
reg_loss = reg * (torch.stack(sampler.exp_sizes) - reg_mean).pow(2).mean()
loss = pred_loss + reg_loss
else:
loss = pred_loss
loss.backward()
duration = default_timer() - start
print(duration)
In [10]:
torch.utils.backcompat.broadcast_warning.enabled = True
torch.utils.backcompat.keepdim_warning.enabled = True
words = Variable(torch.FloatTensor([[[1,2,3,4],[3,4,5,6],[0,0,0,0]],[[1,2,3,4],[0,0,0,0],[0,0,0,0]]]))
In [11]:
for batch in dl:
break
In [12]:
vocab.EmbeddingBag.parameters()
Out[12]:
In [13]:
vocab.EmbeddingBag.weight[3,3]
Out[13]:
In [14]:
vocab.EmbeddingBag.double()
Out[14]:
In [15]:
my_collate2()
In [ ]:
A = torch.randn(5,5)
L = A.mm(A.t())
In [ ]:
import numpy as np
A = torch.randn(5,5)
L = A.mm(A.t())
A = A.numpy()
L = L.numpy()
In [ ]:
n = L.shape[0]
no_choice = list(range(n))
choice = []
# update L
identity = np.zeros(n)
identity[no_choice] = np.ones(len(no_choice))
inverse = np.linalg.inv(L + identity)
inverse_select = inverse[np.ix_(no_choice, no_choice)]
LA = np.linalg.inverse(inverse_select) - np.identity(len(no_choice))
K = L.dot(np.linalg.inv(L+np.eye(n)))
K_index
In [ ]:
In [ ]:
def computeMAP(L):
# initialization
n = L.shape[0]
no_choice = list(range(n))
choice = []
best_p = 0
while True:
candidates = [choice + [j] for j in no_choice]
submats = [L[np.ix_(cand, cand)] for cand in candidates]
probs = [np.linalg.det(submat) - best_p for submat in submats]
if all(p <= 0 for p in probs):
return choice
else:
which = np.argmax(np.array(probs))
choice = candidates[which]
which_elem = choice[-1]
no_choice.remove(which_elem)
best_p += probs[which]
In [ ]:
from itertools import chain, combinations
def exactMAP(L):
n = L.shape[0]
# Generate powerset
s = list(range(n))
powerset = list(chain.from_iterable(combinations(s, r) for r in range(len(s)+1)))
# Compute Probabilities
probs = np.array([np.linalg.det(L[np.ix_(choice, choice)]) for choice in powerset])
which = np.argmax(probs)
MAP = powerset[which], probs[which]
return MAP
In [ ]:
choice = computeMAP(L)
print(choice)
print(len(choice))
In [ ]:
import itertools
from itertools import chain, combinations
def powerset(iterable):
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
In [ ]:
[(choice, np.linalg.det(L[np.ix_(choice, choice)])) for choice in list(powerset(range(6)))]
In [ ]:
In [16]:
batch
Out[16]:
In [49]:
words, target = batch
batch_size, max_set_size, embd_dim = words.size()
word_sums = words.sum(1)
lengths = Variable(words.data.sum(2, keepdim=True).abs().sign().sum(1).expand_as(word_sums))
word_means = word_sums / lengths
word_means
Out[49]:
In [59]:
batch
Out[59]:
In [60]:
batch_size, max_set_size, embd_dim = words.size()
# Unpacking to send through encoder network
# Register indices of individual instances in batch for reconstruction
lengths = words.data.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
s_ix = list(lengths.squeeze().cumsum(0).long() - lengths.squeeze().long())
e_ix = list(lengths.squeeze().cumsum(0).long())
# Filter out zero words
mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
words = words.masked_select(Variable(mask)).view(-1, embd_dim)
# Send through encoder network
enc_words = self.enc_net(words)
# Compilation of encoded words for each instance in sample
# Produce summed representation (code) for each instance in batch using encoded words:
codes = []
for i, (s, e) in enumerate(zip(s_ix, e_ix)):
code = enc_words[s:e].mean(0, keepdim=True)
codes.append(code)
codes = torch.stack(codes).squeeze(1)
assert batch_size == codes.size(0)
assert enc_dim == codes.size(1)
# Produce predictions using codes
pred = self.pred_net(codes)
In [165]:
class AttentionBaseline(nn.Module):
"""
Works with different set sizes, i.e. it does masking!
"""
def __init__(self, embd_dim, hidden_dim, target_dim):
super(AttentionBaseline, self).__init__()
self.embd_dim = embd_dim
self.hidden_dim = hidden_dim
self.target_dim = target_dim
# Attention Network
self.attention_layer = nn.Sequential(nn.Linear(2 * embd_dim, hidden_dim), nn.Tanh())
self.v = nn.Parameter(torch.randn(hidden_dim, 1))
# Uses the sum of the encoded vectors to make a final prediction
self.pred_layer1 = nn.Linear(embd_dim ,hidden_dim)
self.pred_layer2 = nn.Linear(hidden_dim, hidden_dim)
self.pred_layer3 = nn.Linear(hidden_dim, target_dim)
self.pred_net = nn.Sequential(self.pred_layer1, nn.ReLU(), self.pred_layer2, nn.ReLU(), self.pred_layer3)
self.s_ix = []
self.e_ix = []
self.attention_unnorm = None
self.attention = None
self.words = None
self.weighted_words = None
def forward(self, words):
"""
words is a 3D tensor with dimension: batch_size x max_set_size x embd_dim
"""
embd_dim = self.embd_dim
hidden_dim = self.hidden_dim
target_dim = self.target_dim
batch_size, max_set_size, embd_dim = words.size()
# Create context
lengths = words.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
context = (words.sum(1, keepdim=True) / lengths.expand_as(words.sum(1, keepdim=True))).expand_as(words)
# Filter out zero words
mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
self.words = words.masked_select(Variable(mask)).view(-1, embd_dim)
context = context.masked_select(Variable(mask)).view(-1, embd_dim)
# Concatenate and compute attention
batch_x = torch.cat([self.words, context], dim=1)
print('batch_x', batch_x)
self.attention_unnorm = self.attention_layer(batch_x).mm(self.v)
print('attention_unnorm', self.attention_unnorm)
self.s_ix = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
self.e_ix = list(lengths.squeeze().cumsum(0).long().data)
# Apply attention
reps = []
for i, (s, e) in enumerate(zip(self.s_ix, self.e_ix)):
self.attention = (nn.Softmax()(self.attention_unnorm[s:e].t())).t()
rep = (self.attention * self.words[s:e]).sum(0)
reps.append(rep)
self.weighted_words = torch.stack(reps)
assert self.weighted_words.size(0) == batch_size
print('weighted_words', self.weighted_words)
pred = self.pred_net(self.weighted_words)
return pred
torch.manual_seed(0)
words = Variable(torch.FloatTensor([[[1,2,3],[2,2,2],[0,0,0],[0,0,0]],[[1,2,3],[0,0,0],[0,0,0],[0,0,0]]]))
net = AttentionBaseline(3, 10, 2)
net(words)
Out[165]:
In [167]:
reps = []
for s, e in zip(net.s_ix, net.e_ix):
attention = (nn.Softmax()(net.attention_unnorm[s:e].t())).t().expand_as(net.words[s:e])
print(attention)
rep = (attention * net.words[s:e]).sum(0)
reps.append(rep)
torch.stack(reps)
Out[167]:
In [138]:
attention
Out[138]:
In [168]:
class AttentionBaseline(nn.Module):
"""
Works with different set sizes, i.e. it does masking!
"""
def __init__(self, embd_dim, hidden_dim, target_dim):
super(AttentionBaseline, self).__init__()
self.embd_dim = embd_dim
self.hidden_dim = hidden_dim
self.target_dim = target_dim
# Attention Network
self.attention_layer = nn.Sequential(nn.Linear(2 * embd_dim, hidden_dim), nn.Tanh())
self.v = nn.Parameter(torch.randn(hidden_dim, 1))
# Uses the sum of the encoded vectors to make a final prediction
self.pred_layer1 = nn.Linear(embd_dim ,hidden_dim)
self.pred_layer2 = nn.Linear(hidden_dim, hidden_dim)
self.pred_layer3 = nn.Linear(hidden_dim, target_dim)
self.pred_net = nn.Sequential(self.pred_layer1, nn.ReLU(), self.pred_layer2, nn.ReLU(), self.pred_layer3)
self.s_ix = []
self.e_ix = []
self.attention = []
def forward(self, words):
"""
words is a 3D tensor with dimension: batch_size x max_set_size x embd_dim
"""
embd_dim = self.embd_dim
hidden_dim = self.hidden_dim
target_dim = self.target_dim
batch_size, max_set_size, embd_dim = words.size()
# Create context
lengths = words.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
context = (words.sum(1, keepdim=True) / lengths.expand_as(words.sum(1, keepdim=True))).expand_as(words)
# Filter out zero words
mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
words = words.masked_select(Variable(mask)).view(-1, embd_dim)
context = context.masked_select(Variable(mask)).view(-1, embd_dim)
# Concatenate and compute attention
batch_x = torch.cat([words, context], dim=1)
attention_unnorm = self.attention_layer(batch_x).mm(self.v)
self.s_ix = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
self.e_ix = list(lengths.squeeze().cumsum(0).long().data)
# Apply attention
reps = []
for i, (s, e) in enumerate(zip(self.s_ix, self.e_ix)):
attention = (nn.Softmax()(attention_unnorm[s:e].t())).t()
self.attention.append(attention.data)
rep = (attention * words[s:e]).sum(0)
reps.append(rep)
weighted_words = torch.stack(reps)
assert weighted_words.size(0) == batch_size
pred = self.pred_net(weighted_words)
return pred
torch.manual_seed(0)
words = Variable(torch.FloatTensor([[[1,2,3],[2,2,2],[0,0,0],[0,0,0]],[[1,2,3],[0,0,0],[0,0,0],[0,0,0]]]))
net = AttentionBaseline(3, 10, 2)
net(words)
Out[168]:
In [61]:
# trained models
root_path = '/Users/Max/checkpoints/beer_reviews/'
model = 'allchunksreg0.01reg_mean10.0lr0.001marginal_best_ckp.pth.tar'
name = root_path + model
my_d = torch.load(name, map_location=lambda storage, loc: storage)
In [17]:
list(my_d.keys())
Out[17]:
In [62]:
from dpp_nets.utils.language import Vocabulary, BeerDataset, custom_collate
from dpp_nets.layers.layers import ChunkTrainer
train_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.train.' + 'chunks' + '.txt.gz'
val_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.heldout.' + 'chunks' + '.txt.gz'
embd_path = '/Users/Max/data/beer_reviews/' + 'review+wiki.filtered.200.txt.gz'
word_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.train.' + 'words.txt.gz'
In [23]:
vocab = Vocabulary()
vocab.loadPretrained(embd_path)
vocab.setStops()
vocab.loadCorpus(word_path)
vocab.updateEmbedding()
vocab.setCuda(False)
In [63]:
trainer = ChunkTrainer(200, 500, 200, 200, 3)
trainer.activation = nn.Sigmoid()
trainer.reg = 0.1
trainer.reg_mean = 10
trainer.load_state_dict(my_d['state_dict'])
In [44]:
my_collate = custom_collate(vocab, False)
In [50]:
val_set = BeerDataset(val_path)
val_loader = torch.utils.data.DataLoader(val_set, collate_fn=my_collate, batch_size=10)
train_set = BeerDataset(train_path)
train_loader = torch.utils.data.DataLoader(train_set, collate_fn=my_collate, batch_size=10)
In [46]:
def validate(loader, trainer):
trainer.eval()
total_loss = 0.0
total_pred_loss = 0.0
total_reg_loss = 0.0
for i, batch in enumerate(loader, 1):
review, target = batch
trainer(review, target)
loss = trainer.loss.data[0]
pred_loss = trainer.pred_loss.data[0]
reg_loss = trainer.reg_loss.data[0]
delta = loss - total_loss
total_loss += (delta / i)
delta = pred_loss - total_pred_loss
total_pred_loss += (delta / i)
delta = reg_loss - total_reg_loss
total_reg_loss += (delta / i)
# print("validated one batch")
return total_loss, total_pred_loss, total_reg_loss
In [64]:
validate(val_loader, trainer)
Out[64]:
In [67]:
trainer.kernel_net(batch[0])
Out[67]:
In [68]:
batch[0]
Out[68]:
In [72]:
In [88]:
kernel, words = trainer.kernel_net(batch[0])
trainer.sampler.s_ix = trainer.kernel_net.s_ix
trainer.sampler.e_ix = trainer.kernel_net.e_ix
weighted_words = trainer.sampler(kernel, words)
In [89]:
(weighted_words / words)[:,0]
Out[89]:
In [93]:
((words[2] * words[6]).sum() / (torch.sqrt((words[2]**2).sum()) * torch.sqrt((words[6]**2).sum())))
Out[93]:
In [87]:
for batch in train_loader:
break
In [91]:
torch.sqrt(words[2]**2)
Out[91]:
In [1]:
np.argsort(train_set[3])
In [3]:
import numpy as np
np.argsort(np.array([0,1,2]))
Out[3]:
In [ ]: