In [2]:
import argparse
import os
import shutil

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data.dataloader import DataLoader

from dpp_nets.utils.io import make_embd, make_tensor_dataset
from dpp_nets.my_torch.utilities import pad_tensor

from torch.utils.data import DataLoader


import string
import nltk
import string
import numpy as np
import torch
import nltk

from nltk.corpus import stopwords
import torch
import torch.nn as nn
from collections import OrderedDict

import gzip
from torch.utils.data import Dataset

In [3]:
data_path = '/Users/Max/data/beer_reviews/reviews.all.train.chunks.txt.gz'
word_path = '/Users/Max/data/beer_reviews/reviews.all.train.words.txt.gz'
embd_path = '/Users/Max/data/beer_reviews/review+wiki.filtered.200.txt.gz'

In [3]:
class Vocabulary:
    
    def __init__(self):
        
        # Basic Indexing
        self.word2index = {}
        self.index2word = {}
        
        # Keeping track of vocabulary
        self.vocab_size = 0 
        self.word2count = {}
        
        # Vector Dictionaries
        self.pretrained = {}
        self.random = {}
        self.word2vec = {}
        self.index2vec = {}

        # Set of Stop Words
        self.stop_words = set()
        
        self.Embedding = None
        self.EmbeddingBag = None
    
    def setStops(self):
        
        self.stop_words = set(nltk.corpus.stopwords.words('english'))
        make_stops = set(string.punctuation + '\n' + '\t' + '...')
        unmake_stops = set(('no', 'not'))

        self.stop_words = self.stop_words.union(make_stops)
        self.stop_words = self.stop_words.difference(unmake_stops)      
        
    def loadPretrained(self, embd_path):
        
        self.pretrained = {}
        with gzip.open(embd_path, 'rt') as f:
            for line in f:
                line = line.strip()
                if line:
                    word, *embd = line.split()
                    vec = torch.FloatTensor([float(dim) for dim in embd])            
                    self.pretrained[word]  = vec
                    
    def loadCorpus(self, word_path):
        
        with gzip.open(data_path, 'rt') as f:

            for line in f:
                _, review = line.split('\D')
                review = tuple(tuple(chunk.split('\W')) for chunk in review.split('\T'))

                for words in review:
                    vocab.addWords(words)
            
    def addWords(self, words):
        """
        words: seq containing variable no of words
        """
        for word in words:
            self.addWord(word)

    def addWord(self, word):

        if word not in self.word2index:
            
            # Keeping track of vocabulary
            self.vocab_size += 1
            self.word2count[word] = 1
            
            # Basic Indexing
            self.word2index[word] = self.vocab_size
            self.index2word[self.vocab_size] = word
            
            # Add word vector
            if word in self.pretrained:
                vec = self.pretrained[word]
                self.word2vec[word] = vec
                self.index2vec[self.vocab_size] = vec
                
            else:
                vec = torch.randn(200)
                self.random[word] = vec
                self.word2vec[word] = vec
                self.index2vec[self.vocab_size] = vec
        else:
            self.word2count[word] += 1
            
    def updateEmbedding(self):
        
        vocab_size = len(self.index2vec) + 1
        EMBD_DIM = 200
        
        self.Embedding = nn.Embedding(vocab_size, EMBD_DIM, padding_idx=0)
        self.EmbeddingBag = nn.EmbeddingBag(vocab_size, EMBD_DIM)
        embd_matrix = torch.zeros(vocab_size, EMBD_DIM)
        
        for ix, vec in vocab.index2vec.items():
            embd_matrix[ix] = vec
        
        embd_dict = OrderedDict([('weight', embd_matrix)])
        self.Embedding.load_state_dict(embd_dict)
        self.EmbeddingBag.load_state_dict(embd_dict)
    
    def checkWord(self, word, min_count):
        if word not in vocab.stop_words and word in vocab.word2index and vocab.word2index[word] > min_count:
            return word
            
    def filterReview(self, review):
        """
        review should be like our data set
        """
        f_review = []
        seen = set()
        
        for tup in review:
            f_tuple = []
            
            for word in tup:
                word = self.checkWord(word, 10)
                if word:
                    f_tuple.append(word)
            
            f_tuple = tuple(f_tuple)    
            
            if f_tuple and f_tuple not in seen:
                seen.add(f_tuple)
                f_review.append(f_tuple)
                
        return f_review
    
    def mapIndicesBatch(self, reviews):
        
        f_review = []
        offset = []
        i = 0

        for review in reviews:
            seen = set()
            
            for tup in review: 
                f_tuple = []
                
                for word in tup:
                    word = vocab.checkWord(word, 10)
                    if word:
                        f_tuple.append(word)

                f_tuple = tuple(f_tuple)    

                if f_tuple and f_tuple not in seen:
                    seen.add(f_tuple)
                    f_review.extend([vocab.word2index[word] for word in f_tuple])
                    offset.append(i)
                    i += len(f_tuple)
            
        f_review, offset = torch.LongTensor(f_review), torch.LongTensor(offset)   
        return f_review, offset
    
    def mapIndices(self, review):
        
        f_review = []
        offset = []
        seen = set()
        i = 0

        for tup in review:
            f_tuple = []

            for word in tup:
                word = vocab.checkWord(word, 10)
                if word:
                    f_tuple.append(word)

            f_tuple = tuple(f_tuple)    

            if f_tuple and f_tuple not in seen:
                seen.add(f_tuple)
                f_review.extend([vocab.word2index[word] for word in f_tuple])
                offset.append(i)
                i += len(f_tuple)

        f_review, offset = torch.LongTensor(f_review), torch.LongTensor(offset)   
        return f_review, offset
    
    def returnEmbds(self, review):
        
        f_review = []
        offset = []
        seen = set()
        i = 0

        for tup in review:
            f_tuple = []

            for word in tup:
                word = vocab.checkWord(word, 10)
                if word:
                    f_tuple.append(word)

            f_tuple = tuple(f_tuple)    

            if f_tuple and f_tuple not in seen:
                seen.add(f_tuple)
                f_review.extend([vocab.word2index[word] for word in f_tuple])
                offset.append(i)
                i += len(f_tuple)

        f_review, offset = Variable(torch.LongTensor(f_review)), Variable(torch.LongTensor(offset))
        embd = self.EmbeddingBag(f_review, offset)

        return embd

In [4]:
class BeerDataset(Dataset):
    """BeerDataset."""

    def __init__(self, data_path, aspect='all'):
        
        # Compute size of the data set      
        self.aspect = aspect
        self.vocab = vocab
        
        with gzip.open(data_path, 'rt') as f:
            self.lines = f.readlines()

    def __len__(self):
        return len(self.lines)

    def __getitem__(self, idx):
        
        # Decode
        target, review = self.lines[idx].split('\D')
        
        # Target
        target = torch.FloatTensor([float(t) for t in target.split()[:3]])
        
        # Review
        review = tuple(tuple(chunk.split('\W')) for chunk in review.split('\T'))
        #ixs, offset = self.vocab.mapIndices(review)
        
        #sample = {'ixs': ixs, 'offset': offset, 'target': target}
        sample = {'review': review, 'target': target}
        return sample

In [5]:
vocab = Vocabulary()
vocab.loadPretrained(embd_path)
vocab.setStops()
vocab.loadCorpus(word_path)
vocab.updateEmbedding()

ds = BeerDataset(data_path, vocab)

In [6]:
a = 'abc'
b = 'def'
os.path.join(a,b)


Out[6]:
'abc/def'

In [8]:
# Solution 2 - using mycollate2 + new KernelNetwork
from dpp_nets.layers.layers import MarginalSampler, PredNet

def my_collate2(batch, vocab=vocab):

    # Create indices
    s_ix, e_ix, i = [], [], 0

    for l in [len(vocab.filterReview(d['review'])) for d in batch]:
        s_ix.append(i)
        i += l
        e_ix.append(i)
    
    # Map to Embeddings
    batch_review = [review['review'] for review in batch]
    ixs, offsets =  vocab.mapIndicesBatch(batch_review)
    embd = vocab.EmbeddingBag(Variable(ixs), Variable(offsets))

    # Create target vector
    target_tensor = Variable(torch.stack([d['target'] for d in batch]))
    
    return embd, target_tensor, s_ix, e_ix

class KernelVar(nn.Module):

    def __init__(self, embd_dim, hidden_dim, kernel_dim):
        """
        Currently, this creates a 2-hidden-layer network 
        with ELU non-linearities.

        """
        super(KernelVar, self).__init__()
        self.embd_dim = embd_dim
        self.hidden_dim = hidden_dim
        self.kernel_dim = kernel_dim

        self.layer1 = nn.Linear(2 * embd_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.layer3 = nn.Linear(hidden_dim, kernel_dim)

        self.net = nn.Sequential(self.layer1, nn.Tanh(), self.layer2, nn.Tanh(), self.layer3)

        self.s_ix = None
        self.e_ix = None


    def forward(self, embd):
        """
        Given words, returns batch_kernel of dimension
        [-1, kernel_dim]
        """
        
        # Create context
        context = []
        for s, e in zip(self.s_ix, self.e_ix):
            text = embd[s:e].sum(0, keepdim=True).expand_as(embd[s:e])
            context.append(text)
        context = torch.cat(context, dim=0)
        batch_x = torch.cat([embd, context], dim=1)
        
        batch_kernel = self.net(batch_x)

        return batch_kernel , embd 

from timeit import default_timer
start = default_timer()

dl = DataLoader(ds, batch_size=500, collate_fn=my_collate2)
for batch in dl:
    break

embd, target, s_ix, e_ix = batch

embd_dim = 200
hidden_dim = 500
kernel_dim = 200
enc_dim = 200
target_dim = 3

kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
kernel_net.s_ix, kernel_net.e_ix = s_ix, e_ix

sampler = MarginalSampler()
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)

criterion = nn.MSELoss()
activation = nn.Sigmoid()

pred = None

pred_loss = None 
reg_loss = None
loss = None

reg = 10
reg_mean = 0.1

kernel, words = kernel_net(embd) # returned words are masked now!

sampler.s_ix = kernel_net.s_ix
sampler.e_ix = kernel_net.e_ix

weighted_words = sampler(kernel, words) 

pred_net.s_ix = sampler.s_ix
pred_net.e_ix = sampler.e_ix

pred = pred_net(weighted_words)

target = batch[1]

if activation:
    pred = activation(pred)

pred_loss = criterion(pred, target)

if reg:
    reg_loss = reg * (torch.stack(sampler.exp_sizes) - reg_mean).pow(2).mean()
    loss = pred_loss + reg_loss
else:
    loss = pred_loss

loss.backward()
duration = default_timer() - start
print(duration)


65.42647176500031

In [56]:
def my_collate(batch, vocab=vocab):

    # Count sizes
    max_no_chunks = 0
    for d in batch:
        max_no_chunks = max(max_no_chunks, len(vocab.filterReview(d['review'])))
    
    # Map to Embeddings
    reps = []
    for d in batch:
        rep = vocab.returnEmbds(d['review'])
        rep = torch.cat([rep, Variable(torch.zeros(max_no_chunks + 1 - rep.size(0), rep.size(1)))], dim=0)
        reps.append(rep)
    
    data_tensor = torch.stack(reps) 
    
    # Create target vector
    # target_tensor = Variable(torch.stack([d['target'] for d in batch]))
    target_tensor = Variable(torch.stack([d['target'] for d in batch]))
    
    return data_tensor, target_tensor

# Solution 1 using my_collate
from timeit import default_timer
from dpp_nets.layers.layers import KernelVar, MarginalSampler, PredNet


start = default_timer()

dl = DataLoader(ds, batch_size=500, collate_fn=my_collate)
for batch in dl:
    break
words = batch[0]

kernel_net = KernelVar(200,500,200)

embd_dim = 200
hidden_dim = 500
kernel_dim = 200
enc_dim = 200
target_dim = 3

kernel_net = KernelVar(embd_dim, hidden_dim, kernel_dim)
sampler = MarginalSampler()
pred_net = PredNet(embd_dim, hidden_dim, enc_dim, target_dim)

criterion = nn.MSELoss()
activation = nn.Sigmoid()

pred = None

pred_loss = None 
reg_loss = None
loss = None

reg = 10
reg_mean = 0.1

kernel, words = kernel_net(words) # returned words are masked now!

sampler.s_ix = kernel_net.s_ix
sampler.e_ix = kernel_net.e_ix

weighted_words = sampler(kernel, words) 

pred_net.s_ix = sampler.s_ix
pred_net.e_ix = sampler.e_ix

pred = pred_net(weighted_words)

target = batch[1]

if activation:
    pred = activation(pred)

pred_loss = criterion(pred, target)

if reg:
    reg_loss = reg * (torch.stack(sampler.exp_sizes) - reg_mean).pow(2).mean()
    loss = pred_loss + reg_loss
else:
    loss = pred_loss


loss.backward()
duration = default_timer() - start
print(duration)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-56-d98e5ffca5ab> in <module>()
     29 
     30 dl = DataLoader(ds, batch_size=500, collate_fn=my_collate)
---> 31 for batch in dl:
     32     break
     33 words = batch[0]

~/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    177         if self.num_workers == 0:  # same-process loading
    178             indices = next(self.sample_iter)  # may raise StopIteration
--> 179             batch = self.collate_fn([self.dataset[i] for i in indices])
    180             if self.pin_memory:
    181                 batch = pin_memory_batch(batch)

<ipython-input-56-d98e5ffca5ab> in my_collate(batch, vocab)
     10     for d in batch:
     11         rep = vocab.returnEmbds(d['review'])
---> 12         rep = torch.cat([rep, Variable(torch.zeros(max_no_chunks + 1 - rep.size(0), rep.size(1)))], dim=0)
     13         reps.append(rep)
     14 

~/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/variable.py in cat(iterable, dim)
    895         @staticmethod
    896         def cat(iterable, dim=0):
--> 897             return Concat.apply(dim, *iterable)
    898 
    899         @staticmethod

~/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/_functions/tensor.py in forward(ctx, dim, *inputs)
    315         ctx.dim = dim
    316         ctx.input_sizes = [i.size(dim) for i in inputs]
--> 317         return torch.cat(inputs, dim)
    318 
    319     @staticmethod

TypeError: cat received an invalid combination of arguments - got (tuple, int), but expected one of:
 * (sequence[torch.DoubleTensor] seq)
 * (sequence[torch.DoubleTensor] seq, int dim)
      didn't match because some of the arguments have invalid types: (tuple, int)

In [10]:
torch.utils.backcompat.broadcast_warning.enabled = True
torch.utils.backcompat.keepdim_warning.enabled = True
words = Variable(torch.FloatTensor([[[1,2,3,4],[3,4,5,6],[0,0,0,0]],[[1,2,3,4],[0,0,0,0],[0,0,0,0]]]))

In [11]:
for batch in dl:
    break

In [12]:
vocab.EmbeddingBag.parameters()


Out[12]:
<generator object Module.parameters at 0x12bee79e8>

In [13]:
vocab.EmbeddingBag.weight[3,3]


Out[13]:
Variable containing:
1.00000e-02 *
 -2.3152
[torch.FloatTensor of size 1]

In [14]:
vocab.EmbeddingBag.double()


Out[14]:
EmbeddingBag(112232, 200, mode=mean)

In [15]:
my_collate2()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-7a2948b49dad> in <module>()
----> 1 my_collate2()

TypeError: my_collate2() missing 1 required positional argument: 'batch'

In [ ]:
A = torch.randn(5,5)
L = A.mm(A.t())

In [ ]:
import numpy as np


A = torch.randn(5,5)
L = A.mm(A.t())

A = A.numpy()
L = L.numpy()

In [ ]:
n = L.shape[0]
no_choice = list(range(n))
choice = []

# update L
identity = np.zeros(n)
identity[no_choice] = np.ones(len(no_choice))
inverse = np.linalg.inv(L + identity)
inverse_select = inverse[np.ix_(no_choice, no_choice)]
LA = np.linalg.inverse(inverse_select) - np.identity(len(no_choice))


K = L.dot(np.linalg.inv(L+np.eye(n)))
K_index

In [ ]:


In [ ]:
def computeMAP(L):

    # initialization
    n = L.shape[0]
    no_choice = list(range(n))
    choice = []
    best_p = 0

    while True:

        candidates = [choice + [j] for j in no_choice]
        submats = [L[np.ix_(cand, cand)] for cand in candidates]
        probs = [np.linalg.det(submat) - best_p for submat in submats]

        if all(p <= 0 for p in probs):
            return choice
        else:
            which = np.argmax(np.array(probs))
            choice = candidates[which]
            which_elem = choice[-1]
            no_choice.remove(which_elem)
            best_p += probs[which]

In [ ]:
from itertools import chain, combinations

def exactMAP(L):

    n = L.shape[0]
    
    # Generate powerset
    s = list(range(n))
    powerset = list(chain.from_iterable(combinations(s, r) for r in range(len(s)+1)))
    
    # Compute Probabilities 
    probs = np.array([np.linalg.det(L[np.ix_(choice, choice)]) for choice in powerset])
    which = np.argmax(probs)
    MAP = powerset[which], probs[which]
    
    return MAP

In [ ]:
choice = computeMAP(L)
print(choice)
print(len(choice))

In [ ]:
import itertools
from itertools import chain, combinations
def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))

In [ ]:
[(choice, np.linalg.det(L[np.ix_(choice, choice)])) for choice in list(powerset(range(6)))]

In [ ]:


In [16]:
batch


Out[16]:
(Variable containing:
 ( 0 ,.,.) = 
   0.0382 -0.1186 -0.0850  ...  -0.0401 -0.1082 -0.0871
  -0.1019 -0.0845 -0.0379  ...  -0.0387 -0.0539  0.0405
  -0.0130  0.0048  0.0105  ...  -0.0123 -0.0478 -0.0229
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 ( 1 ,.,.) = 
  -0.0259  0.0692  0.0024  ...  -0.0073 -0.0043 -0.0124
  -0.0828 -0.0015 -0.0338  ...   0.0101  0.0463 -0.0250
   0.0057 -0.0006 -0.0263  ...  -0.0249 -0.0389 -0.0218
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 ( 2 ,.,.) = 
   0.0195 -0.0219 -0.0247  ...  -0.0158 -0.0756 -0.0718
  -0.1637  0.0951 -0.0571  ...  -0.0486 -0.0456 -0.0826
   0.0421 -0.0581 -0.0280  ...   0.0181 -0.1355 -0.1369
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 ... 
 
 (497,.,.) = 
   0.0108  0.0406  0.0744  ...   0.0114 -0.0296 -0.0225
  -0.0145  0.0345  0.1035  ...   0.0576 -0.0295 -0.0046
   0.0439 -0.0228  0.1279  ...  -0.0327 -0.0286  0.0267
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 (498,.,.) = 
  -0.0209  0.0224  0.0771  ...   0.0186 -0.0303 -0.0095
  -0.0032 -0.0297  0.0874  ...   0.1341 -0.0426  0.0885
   0.0511  0.1347  0.0643  ...   0.0043 -0.0220 -0.0243
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 (499,.,.) = 
  -0.0774  0.0184  0.1064  ...   0.0228 -0.0134 -0.0485
  -0.0084  0.0071 -0.0297  ...  -0.0925 -0.0551  0.0825
  -0.0384  0.0011  0.0173  ...  -0.0163 -0.0419  0.0363
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 [torch.FloatTensor of size 500x297x200], Variable containing:
  0.8000  1.0000  0.8000
  0.5000  0.2000  0.4000
  1.0000  0.6000  0.9000
            ⋮            
  0.6000  0.5000  0.7000
  0.8000  0.9000  0.8000
  0.7000  0.5000  0.7000
 [torch.FloatTensor of size 500x3])

In [49]:
words, target = batch
batch_size, max_set_size, embd_dim = words.size()
word_sums = words.sum(1) 
lengths = Variable(words.data.sum(2, keepdim=True).abs().sign().sum(1).expand_as(word_sums))
word_means = word_sums / lengths
word_means


/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/_functions/reduce.py:21: UserWarning: backwards compatibility: call to "sum" uses default value for keepdim which has changed default to False.  Consider passing as kwarg.
  return input.sum(dim)
/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/ipykernel_launcher.py:4: UserWarning: backwards compatibility: call to "sum" uses default value for keepdim which has changed default to False.  Consider passing as kwarg.
  after removing the cwd from sys.path.
Out[49]:
Variable containing:
-2.3880e-02 -1.4313e-02  1.9481e-02  ...   1.3393e-02 -1.9901e-02 -4.3429e-02
-1.8973e-02  1.8574e-02  4.9568e-02  ...   8.4320e-03 -1.8898e-02 -5.9302e-02
-1.0988e-02 -1.6745e-02  1.5342e-02  ...   2.7094e-02 -2.7952e-02 -4.0182e-02
                ...                   ⋱                   ...                
-2.8851e-02  1.2475e-03  2.1513e-02  ...   1.9931e-02 -1.2227e-02 -2.9908e-02
-4.4926e-02  1.3410e-02  2.7187e-02  ...   2.0730e-02 -1.7321e-02 -4.2113e-02
-5.9178e-02 -2.5057e-02  4.0354e-02  ...   1.4352e-02 -1.9933e-02 -2.8511e-02
[torch.FloatTensor of size 500x200]

In [59]:
batch


Out[59]:
(Variable containing:
 ( 0 ,.,.) = 
   0.0382 -0.1186 -0.0850  ...  -0.0401 -0.1082 -0.0871
  -0.1019 -0.0845 -0.0379  ...  -0.0387 -0.0539  0.0405
  -0.0130  0.0048  0.0105  ...  -0.0123 -0.0478 -0.0229
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 ( 1 ,.,.) = 
  -0.0259  0.0692  0.0024  ...  -0.0073 -0.0043 -0.0124
  -0.0828 -0.0015 -0.0338  ...   0.0101  0.0463 -0.0250
   0.0057 -0.0006 -0.0263  ...  -0.0249 -0.0389 -0.0218
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 ( 2 ,.,.) = 
   0.0195 -0.0219 -0.0247  ...  -0.0158 -0.0756 -0.0718
  -0.1637  0.0951 -0.0571  ...  -0.0486 -0.0456 -0.0826
   0.0421 -0.0581 -0.0280  ...   0.0181 -0.1355 -0.1369
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 ... 
 
 (497,.,.) = 
   0.0108  0.0406  0.0744  ...   0.0114 -0.0296 -0.0225
  -0.0145  0.0345  0.1035  ...   0.0576 -0.0295 -0.0046
   0.0439 -0.0228  0.1279  ...  -0.0327 -0.0286  0.0267
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 (498,.,.) = 
  -0.0209  0.0224  0.0771  ...   0.0186 -0.0303 -0.0095
  -0.0032 -0.0297  0.0874  ...   0.1341 -0.0426  0.0885
   0.0511  0.1347  0.0643  ...   0.0043 -0.0220 -0.0243
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 
 (499,.,.) = 
  -0.0774  0.0184  0.1064  ...   0.0228 -0.0134 -0.0485
  -0.0084  0.0071 -0.0297  ...  -0.0925 -0.0551  0.0825
  -0.0384  0.0011  0.0173  ...  -0.0163 -0.0419  0.0363
            ...             ⋱             ...          
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
   0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
 [torch.FloatTensor of size 500x297x200], Variable containing:
  0.8000  1.0000  0.8000
  0.5000  0.2000  0.4000
  1.0000  0.6000  0.9000
            ⋮            
  0.6000  0.5000  0.7000
  0.8000  0.9000  0.8000
  0.7000  0.5000  0.7000
 [torch.FloatTensor of size 500x3])

In [60]:
batch_size, max_set_size, embd_dim = words.size()

# Unpacking to send through encoder network
# Register indices of individual instances in batch for reconstruction
lengths = words.data.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
s_ix = list(lengths.squeeze().cumsum(0).long() - lengths.squeeze().long())
e_ix = list(lengths.squeeze().cumsum(0).long())

# Filter out zero words 
mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
words = words.masked_select(Variable(mask)).view(-1, embd_dim)

# Send through encoder network
enc_words = self.enc_net(words)

# Compilation of encoded words for each instance in sample
# Produce summed representation (code) for each instance in batch using encoded words:
codes = []

for i, (s, e) in enumerate(zip(s_ix, e_ix)):
    code = enc_words[s:e].mean(0, keepdim=True)
    codes.append(code)

codes = torch.stack(codes).squeeze(1)
assert batch_size == codes.size(0)
assert enc_dim == codes.size(1)

# Produce predictions using codes
pred = self.pred_net(codes)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-60-4d31b308f4c6> in <module>()
     12 
     13 # Send through encoder network
---> 14 enc_words = self.enc_net(words)
     15 
     16 # Compilation of encoded words for each instance in sample

NameError: name 'self' is not defined

In [165]:
class AttentionBaseline(nn.Module):
    """
    Works with different set sizes, i.e. it does masking!
    """

    def __init__(self, embd_dim, hidden_dim, target_dim):

        super(AttentionBaseline, self).__init__()

        self.embd_dim = embd_dim
        self.hidden_dim = hidden_dim
        self.target_dim = target_dim

        # Attention Network 
        self.attention_layer = nn.Sequential(nn.Linear(2 * embd_dim, hidden_dim), nn.Tanh())
        self.v = nn.Parameter(torch.randn(hidden_dim, 1))

        # Uses the sum of the encoded vectors to make a final prediction
        self.pred_layer1 = nn.Linear(embd_dim ,hidden_dim)
        self.pred_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.pred_layer3 = nn.Linear(hidden_dim, target_dim)
        self.pred_net = nn.Sequential(self.pred_layer1, nn.ReLU(), self.pred_layer2, nn.ReLU(), self.pred_layer3)

        self.s_ix = []
        self.e_ix = []
        
        self.attention_unnorm = None
        self.attention = None
        self.words = None
        self.weighted_words = None

    def forward(self, words):
        """
        words is a 3D tensor with dimension: batch_size x max_set_size x embd_dim

        """
        embd_dim = self.embd_dim
        hidden_dim = self.hidden_dim
        target_dim = self.target_dim

        batch_size, max_set_size, embd_dim = words.size()

        # Create context
        lengths = words.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
        context = (words.sum(1, keepdim=True) / lengths.expand_as(words.sum(1, keepdim=True))).expand_as(words)

        # Filter out zero words 
        mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
        self.words = words.masked_select(Variable(mask)).view(-1, embd_dim)
        context = context.masked_select(Variable(mask)).view(-1, embd_dim)

        # Concatenate and compute attention
        batch_x = torch.cat([self.words, context], dim=1)
        print('batch_x', batch_x)
        self.attention_unnorm = self.attention_layer(batch_x).mm(self.v)
        print('attention_unnorm', self.attention_unnorm)

        self.s_ix = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
        self.e_ix = list(lengths.squeeze().cumsum(0).long().data)

        # Apply attention
        reps = []
        for i, (s, e) in enumerate(zip(self.s_ix, self.e_ix)):
            self.attention = (nn.Softmax()(self.attention_unnorm[s:e].t())).t()
            rep = (self.attention * self.words[s:e]).sum(0)
            reps.append(rep)
            
        self.weighted_words = torch.stack(reps)
        
        assert self.weighted_words.size(0) == batch_size
        print('weighted_words', self.weighted_words)

        pred = self.pred_net(self.weighted_words)

        return pred 
torch.manual_seed(0)
words = Variable(torch.FloatTensor([[[1,2,3],[2,2,2],[0,0,0],[0,0,0]],[[1,2,3],[0,0,0],[0,0,0],[0,0,0]]]))
net = AttentionBaseline(3, 10, 2)
net(words)


batch_x Variable containing:
 1.0000  2.0000  3.0000  1.5000  2.0000  2.5000
 2.0000  2.0000  2.0000  1.5000  2.0000  2.5000
 1.0000  2.0000  3.0000  1.0000  2.0000  3.0000
[torch.FloatTensor of size 3x6]

attention_unnorm Variable containing:
 0.2140
 0.3386
 0.0283
[torch.FloatTensor of size 3x1]

weighted_words Variable containing:
 1.5311  2.0000  2.4689
 1.0000  2.0000  3.0000
[torch.FloatTensor of size 2x3]

/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/_functions/reduce.py:21: UserWarning: backwards compatibility: call to "sum" uses default value for keepdim which has changed default to False.  Consider passing as kwarg.
  return input.sum(dim)
Out[165]:
Variable containing:
 0.1759  0.0761
 0.1767  0.0303
[torch.FloatTensor of size 2x2]

In [167]:
reps = []
for s, e in zip(net.s_ix, net.e_ix):
    attention = (nn.Softmax()(net.attention_unnorm[s:e].t())).t().expand_as(net.words[s:e])
    print(attention)
    rep = (attention * net.words[s:e]).sum(0)
    reps.append(rep)
torch.stack(reps)


Variable containing:
 0.4689  0.4689  0.4689
 0.5311  0.5311  0.5311
[torch.FloatTensor of size 2x3]

Variable containing:
 1  1  1
[torch.FloatTensor of size 1x3]

/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/_functions/reduce.py:21: UserWarning: backwards compatibility: call to "sum" uses default value for keepdim which has changed default to False.  Consider passing as kwarg.
  return input.sum(dim)
Out[167]:
Variable containing:
 1.5311  2.0000  2.4689
 1.0000  2.0000  3.0000
[torch.FloatTensor of size 2x3]

In [138]:
attention


Out[138]:
Variable containing:
 1
[torch.FloatTensor of size 1x1]

In [168]:
class AttentionBaseline(nn.Module):
    """
    Works with different set sizes, i.e. it does masking!
    """

    def __init__(self, embd_dim, hidden_dim, target_dim):

        super(AttentionBaseline, self).__init__()

        self.embd_dim = embd_dim
        self.hidden_dim = hidden_dim
        self.target_dim = target_dim

        # Attention Network 
        self.attention_layer = nn.Sequential(nn.Linear(2 * embd_dim, hidden_dim), nn.Tanh())
        self.v = nn.Parameter(torch.randn(hidden_dim, 1))

        # Uses the sum of the encoded vectors to make a final prediction
        self.pred_layer1 = nn.Linear(embd_dim ,hidden_dim)
        self.pred_layer2 = nn.Linear(hidden_dim, hidden_dim)
        self.pred_layer3 = nn.Linear(hidden_dim, target_dim)
        self.pred_net = nn.Sequential(self.pred_layer1, nn.ReLU(), self.pred_layer2, nn.ReLU(), self.pred_layer3)

        self.s_ix = []
        self.e_ix = []

        self.attention = []

    def forward(self, words):
        """
        words is a 3D tensor with dimension: batch_size x max_set_size x embd_dim

        """
        embd_dim = self.embd_dim
        hidden_dim = self.hidden_dim
        target_dim = self.target_dim

        batch_size, max_set_size, embd_dim = words.size()

        # Create context
        lengths = words.sum(2, keepdim=True).abs().sign().sum(1, keepdim=True)
        context = (words.sum(1, keepdim=True) / lengths.expand_as(words.sum(1, keepdim=True))).expand_as(words)

        # Filter out zero words 
        mask = words.data.sum(2, keepdim=True).abs().sign().expand_as(words).byte()
        words = words.masked_select(Variable(mask)).view(-1, embd_dim)
        context = context.masked_select(Variable(mask)).view(-1, embd_dim)

        # Concatenate and compute attention
        batch_x = torch.cat([words, context], dim=1)
        attention_unnorm = self.attention_layer(batch_x).mm(self.v)

        self.s_ix = list(lengths.squeeze().cumsum(0).long().data - lengths.squeeze().long().data)
        self.e_ix = list(lengths.squeeze().cumsum(0).long().data)

        # Apply attention
        reps = []
        for i, (s, e) in enumerate(zip(self.s_ix, self.e_ix)):
            attention = (nn.Softmax()(attention_unnorm[s:e].t())).t()
            self.attention.append(attention.data)
            rep = (attention * words[s:e]).sum(0)
            reps.append(rep)

        weighted_words = torch.stack(reps)
        assert weighted_words.size(0) == batch_size


        pred = self.pred_net(weighted_words)

        return pred 
torch.manual_seed(0)
words = Variable(torch.FloatTensor([[[1,2,3],[2,2,2],[0,0,0],[0,0,0]],[[1,2,3],[0,0,0],[0,0,0],[0,0,0]]]))
net = AttentionBaseline(3, 10, 2)
net(words)


/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/torch/autograd/_functions/reduce.py:21: UserWarning: backwards compatibility: call to "sum" uses default value for keepdim which has changed default to False.  Consider passing as kwarg.
  return input.sum(dim)
Out[168]:
Variable containing:
 0.1759  0.0761
 0.1767  0.0303
[torch.FloatTensor of size 2x2]

In [61]:
# trained models
root_path = '/Users/Max/checkpoints/beer_reviews/'
model = 'allchunksreg0.01reg_mean10.0lr0.001marginal_best_ckp.pth.tar'
name = root_path + model
my_d = torch.load(name, map_location=lambda storage, loc: storage)

In [17]:
list(my_d.keys())


Out[17]:
['epoch:', 'model', 'state_dict', 'lowest_loss', 'optimizer']

In [62]:
from dpp_nets.utils.language import Vocabulary, BeerDataset, custom_collate
from dpp_nets.layers.layers import ChunkTrainer

train_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.train.' + 'chunks' + '.txt.gz'
val_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.heldout.' + 'chunks' + '.txt.gz'
embd_path = '/Users/Max/data/beer_reviews/' + 'review+wiki.filtered.200.txt.gz'
word_path = '/Users/Max/data/beer_reviews/' + 'reviews.' + 'all' + '.train.' + 'words.txt.gz'

In [23]:
vocab = Vocabulary()
vocab.loadPretrained(embd_path)
vocab.setStops()
vocab.loadCorpus(word_path)
vocab.updateEmbedding()
vocab.setCuda(False)

In [63]:
trainer = ChunkTrainer(200, 500, 200, 200, 3)
trainer.activation = nn.Sigmoid()
trainer.reg = 0.1
trainer.reg_mean = 10
trainer.load_state_dict(my_d['state_dict'])

In [44]:
my_collate = custom_collate(vocab, False)

In [50]:
val_set = BeerDataset(val_path)
val_loader = torch.utils.data.DataLoader(val_set, collate_fn=my_collate, batch_size=10)
train_set = BeerDataset(train_path)
train_loader = torch.utils.data.DataLoader(train_set, collate_fn=my_collate, batch_size=10)

In [46]:
def validate(loader, trainer):

    trainer.eval()

    total_loss = 0.0
    total_pred_loss = 0.0
    total_reg_loss = 0.0

    for i, batch in enumerate(loader, 1):

        review, target = batch

        trainer(review, target)

        loss = trainer.loss.data[0]
        pred_loss = trainer.pred_loss.data[0]
        reg_loss = trainer.reg_loss.data[0]

        delta = loss - total_loss
        total_loss += (delta / i)
        delta = pred_loss - total_pred_loss 
        total_pred_loss += (delta / i)
        delta = reg_loss - total_reg_loss
        total_reg_loss += (delta / i)

        # print("validated one batch")

    return total_loss, total_pred_loss, total_reg_loss

In [64]:
validate(val_loader, trainer)


Out[64]:
(1.282385425224899, 0.029764174521279788, 1.2526212519432145)

In [67]:
trainer.kernel_net(batch[0])


Out[67]:
(Variable containing:
 -1.9418 -1.6623  1.7433  ...   0.8431  2.7226  0.3328
 -2.4954 -1.5230  4.1865  ...   0.5212  2.6975  0.1213
 -0.8185  1.6773 -2.3425  ...  -1.6988  3.2755  3.0228
           ...             ⋱             ...          
 -1.1800 -2.2931  3.2514  ...   0.5663  2.9770 -0.8179
 -1.0904  0.0158 -0.0062  ...  -0.1544  5.2335  2.5472
 -2.0173 -2.8696  3.4641  ...  -0.1246  4.9017  0.8438
 [torch.FloatTensor of size 14x200], Variable containing:
  4.1406e-02 -2.2289e-03  3.9951e-02  ...   1.3364e-04 -5.6739e-02 -5.9640e-02
  3.2634e-03  2.3399e-02 -9.9318e-03  ...  -2.2967e-02 -6.6968e-02  1.9021e-02
 -2.2690e-02  9.4700e-04  2.5908e-02  ...   2.0340e-02 -1.7681e-02 -3.6136e-02
                 ...                   ⋱                   ...                
 -2.1147e-03 -5.0989e-02 -2.1682e-02  ...  -1.7501e-02 -5.7269e-02 -2.1876e-02
  1.0210e-02 -1.3399e-02  1.0572e-02  ...   4.6790e-03 -5.3796e-02 -5.2150e-02
 -1.9828e-02 -9.2585e-02 -1.4802e-02  ...  -5.5002e-06 -2.9345e-02 -5.4205e-02
 [torch.FloatTensor of size 14x200])

In [68]:
batch[0]


Out[68]:
Variable containing:
( 0 ,.,.) = 
  0.0414 -0.0022  0.0400  ...   0.0001 -0.0567 -0.0596
  0.0033  0.0234 -0.0099  ...  -0.0230 -0.0670  0.0190
 -0.0227  0.0009  0.0259  ...   0.0203 -0.0177 -0.0361
           ...             ⋱             ...          
  0.0102 -0.0134  0.0106  ...   0.0047 -0.0538 -0.0521
 -0.0198 -0.0926 -0.0148  ...  -0.0000 -0.0293 -0.0542
  0.0000  0.0000  0.0000  ...   0.0000  0.0000  0.0000
[torch.FloatTensor of size 1x15x200]

In [72]:



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-72-62bd740f6949> in <module>()
----> 1 self.pred_net.s_ix = self.sampler.s_ix
      2 self.pred_net.e_ix = self.sampler.e_ix

NameError: name 'self' is not defined

In [88]:
kernel, words = trainer.kernel_net(batch[0])
trainer.sampler.s_ix = trainer.kernel_net.s_ix
trainer.sampler.e_ix = trainer.kernel_net.e_ix
weighted_words = trainer.sampler(kernel, words)

In [89]:
(weighted_words / words)[:,0]


Out[89]:
Variable containing:
 0.6517
 0.2896
 0.4587
 0.5311
 0.4152
 0.5253
 0.3540
 0.4208
 0.5471
 0.5934
 0.4787
 0.8125
 0.2178
 0.4362
 0.5165
 0.7257
 0.4486
 0.4068
 0.2543
 0.4597
 0.3139
 0.9362
 0.9884
 0.9587
 0.9937
 0.9953
 0.9290
 0.9754
 0.5675
 0.1470
 0.4389
 0.3450
 0.4050
 0.2112
 0.4281
 0.4106
 0.9373
 0.5862
 0.3574
 0.2361
 0.5830
 0.2821
 0.4981
 0.2088
 0.5895
 0.3613
 0.5043
 0.3466
 0.2690
 0.2637
 0.4807
 0.1704
 0.9723
 0.9496
 0.7807
 0.9368
 0.8544
 0.3600
 0.7408
 0.6496
 0.9412
 0.7422
 0.7238
 0.6654
 0.5697
 0.8817
 0.7893
 0.8870
 0.7256
 0.8695
 0.6865
 0.9968
 0.9765
 0.9909
 0.9930
 0.9775
 0.9729
 0.9324
 0.9735
 0.9904
 0.9939
 0.9564
 0.9524
 0.9023
 0.9391
 0.9214
 0.9384
 0.9418
 0.5597
 0.8184
 0.9087
 0.9697
 0.9596
 0.7505
 0.9877
 0.9936
 0.9862
 0.9914
 0.9850
 0.9916
 0.9930
[torch.FloatTensor of size 101]

In [93]:
((words[2] * words[6]).sum() / (torch.sqrt((words[2]**2).sum()) * torch.sqrt((words[6]**2).sum())))


Out[93]:
Variable containing:
 0.5391
[torch.FloatTensor of size 1]

In [87]:
for batch in train_loader: 
    break

In [91]:
torch.sqrt(words[2]**2)


Out[91]:
Variable containing:
 0.0017
 0.0280
 0.0339
 0.0081
 0.0189
 0.0482
 0.0113
 0.0130
 0.0256
 0.0174
 0.0547
 0.0059
 0.0558
 0.0288
 0.0121
 0.0304
 0.0218
 0.0111
 0.0352
 0.0508
 0.0381
 0.0025
 0.0018
 0.0236
 0.0111
 0.0416
 0.0189
 0.0027
 0.0460
 0.0260
 0.0209
 0.0513
 0.0213
 0.0217
 0.0007
 0.0588
 0.0466
 0.0174
 0.0123
 0.0366
 0.0300
 0.0045
 0.0324
 0.0077
 0.0339
 0.0058
 0.0380
 0.0437
 0.0330
 0.0209
 0.0135
 0.0344
 0.0564
 0.0169
 0.1091
 0.0729
 0.0191
 0.0220
 0.0248
 0.0215
 0.0379
 0.0013
 0.0058
 0.0347
 0.0047
 0.0005
 0.0274
 0.0246
 0.0165
 0.0141
 0.0107
 0.0019
 0.0202
 0.0820
 0.0742
 0.0311
 0.1113
 0.0288
 0.0007
 0.0160
 0.0025
 0.0401
 0.0538
 0.0325
 0.0025
 0.0458
 0.0371
 0.0667
 0.0090
 0.0260
 0.0019
 0.0163
 0.0038
 0.0292
 0.0227
 0.0208
 0.0627
 0.0180
 0.0156
 0.0461
 0.0085
 0.0127
 0.0106
 0.0603
 0.0396
 0.0799
 0.0123
 0.0591
 0.0127
 0.0137
 0.0208
 0.0902
 0.0504
 0.0085
 0.0462
 0.0800
 0.0221
 0.0137
 0.0465
 0.0067
 0.0318
 0.0812
 0.0096
 0.0412
 0.0167
 0.0352
 0.0052
 0.0139
 0.0318
 0.0197
 0.0097
 0.0201
 0.1010
 0.0307
 0.0120
 0.0070
 0.0101
 0.0321
 0.0336
 0.0161
 0.0040
 0.0202
 0.0474
 0.0384
 0.0055
 0.0321
 0.0514
 0.0948
 0.0243
 0.0959
 0.0495
 0.0279
 0.0075
 0.0106
 0.0353
 0.0300
 0.0153
 0.0195
 0.0327
 0.0519
 0.0340
 0.0081
 0.0208
 0.0401
 0.0188
 0.0357
 0.0612
 0.0225
 0.0409
 0.0270
 0.0092
 0.0301
 0.0162
 0.0511
 0.0252
 0.0461
 0.0313
 0.0120
 0.0261
 0.0303
 0.0050
 0.0043
 0.0805
 0.0418
 0.0208
 0.0544
 0.0058
 0.0145
 0.0017
 0.0026
 0.0161
 0.0393
 0.0251
 0.0145
 0.0052
 0.0324
 0.0067
 0.0240
 0.0207
 0.0311
[torch.FloatTensor of size 200]

In [1]:
np.argsort(train_set[3])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-e81bfcdbc38b> in <module>()
----> 1 np.argsort(train_set[3])

NameError: name 'np' is not defined

In [3]:
import numpy as np
np.argsort(np.array([0,1,2]))


Out[3]:
array([0, 1, 2])

In [ ]: