In [1]:
import torch

from dpp_nets.my_torch.simulator import SimKDPPDeepSet
from dpp_nets.my_torch.simulator import SimFilter
from dpp_nets.helper.plotting import plot_floats, plot_dict

import torch
import torch.nn as nn
from torch.autograd import Variable
from dpp_nets.my_torch.linalg import custom_decomp
from dpp_nets.my_torch.DPP import DPP
from dpp_nets.my_torch.utilities import compute_baseline

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

from dpp_nets.my_torch.linalg import custom_decomp
from dpp_nets.my_torch.DPP import DPP
from dpp_nets.my_torch.utilities import compute_baseline
from dpp_nets.my_torch.utilities import pad_tensor

import dpp_nets.my_torch

import numpy as np 
from collections import defaultdict


Traceback (most recent call last):

  File "/Users/Max/Coding/anaconda2/envs/torch2/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)

  File "<ipython-input-1-6aad6a0613bd>", line 3, in <module>
    from dpp_nets.my_torch.simulator import SimKDPPDeepSet

  File "/Users/Max/git/dpp_nets/dpp_nets/my_torch/simulator.py", line 59
    def generate(self, batch_size):
                                   ^
IndentationError: unindent does not match any outer indentation level

In [ ]:
from dpp_nets.layers.layers import *

In [ ]:
class SimKDPPDeepSet(object):

    def __init__(self, network_params, dtype):

        # Customizable parameters
        self.set_size = set_size = network_params['set_size'] # 40
        self.n_clusters = n_clusters = network_params['n_clusters'] # 
        self.dtype = dtype

        # Fixed parameters
        self.kernel_in = kernel_in = 100
        self.kernel_h = kernel_h = 500
        self.kernel_out = kernel_out = 100

        self.pred_in = pred_in = 50 # kernel_in / 2
        self.pred_h = pred_h = 500
        self.pred_out = pred_out = 100
        
        # 2-Hidden-Layer Networks 
        self.kernel_net = KernelFixed(pred_in, kernel_h, kernel_out)
        self.kernel_net.type(self.dtype)

        self.pred_net = DeepSetPred(pred_in, pred_h, pred_out, pred_in)
        self.pred_net.type(self.dtype)

        # Data
        np.random.seed(0)
        self.means = dtype(np.random.randint(-10,10,[n_clusters, int(pred_in)]).astype("float"))
        self.saved_subsets = None
        self.saved_losses = None
        self.saved_baselines = None

        # 
        self.criterion = nn.MSELoss()

        # Record loss
        self.loss_dict = defaultdict(list)
        self.prec_dict = defaultdict(list)
        self.rec_dict = defaultdict(list)
        self.ssize_dict = defaultdict(list)

        # Useful intermediate variables 
        self.embedding = None
        self.subset = None
        self.pick = None
        self.pred = None


    def generate(self, batch_size):
        """sdf"
        Arguments:
        means: Probs best to make this an attribute of the class, 
        so that repeated training works with the same data distribution.


        """
        batch_size = batch_size
        n_clusters = self.n_clusters
        set_size = self.set_size
        embd_dim = self.pred_in
        dtype = self.dtype
        means = self.means

        # Generate index
        index = torch.cat([torch.arange(0, float(n_clusters)).expand(batch_size, n_clusters).long(), 
                          torch.multinomial(torch.ones(batch_size, n_clusters), set_size - n_clusters, replacement=True)]
                         ,dim=1)
        index = index.t()[torch.randperm(set_size)].t().contiguous()

        # Generate words, context, target
        words = dtype(torch.normal(means.index_select(0,index.view(index.numel()))).view(batch_size, set_size, embd_dim))
        context = dtype(words.sum(1).expand_as(words))

        target = torch.sin(words.mean(1)).squeeze()

        return words, context, target, index 

    def train(self, train_iter, batch_size, lr, alpha_iter=1, baseline=True, reg=0, reg_mean=0):
        """
        Training the model. 
        Doesn't use the forward pass as want to sample repeatedly!

        """
        set_size = self.set_size
        n_clusters = self.n_clusters
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out
        embd_dim = self.pred_in
        dtype = self.dtype

        loss_log = 100
        params = list(self.kernel_net.parameters()) + list(self.pred_net.parameters())
        optimizer = optim.Adam(params, lr=lr)

        self.loss_dict.clear()
        self.ssize_dict
      
        sample = SampleFixed(alpha_iter)

        for t in range(train_iter):
            
            words, context, target, index = self.generate(batch_size)
            words = Variable(words)
            target = Variable(target)
            
            kernel = self.kernel_net(words)
            word_picks = sample(kernel, words)
            pred = self.pred_net(word_picks)
            action_list = sample.saved_subsets
            exp_sizes = sample.exp_sizes

            loss = custom_backprop(reg, reg_mean)(pred, target, action_list, exp_sizes)
            
            ssizes = [sample.data.sum() for example in action_list for sample in example]
            ssize = sum(ssizes) / len(ssizes)

            optimizer.step()
            optimizer.zero_grad()

            self.loss_dict[t].append(loss.data[0])
            self.ssize_dict[t].append(ssize)

            if not ((t + 1) % loss_log):
                print("Loss at it ", t+1, " is: ", loss.data[0])

    def evaluate(self, test_iter):
        
        set_size = self.set_size
        n_clusters = self.n_clusters
        kernel_in = self.kernel_in
        kernel_out = self.kernel_out
        embd_dim = self.pred_in
        dtype = self.dtype

        sample = SampleFixed(alpha_iter)
        
        cum_loss = 0. 
        cum_ssize = 0.

        for t in range(test_iter):
            words, context, target, index = self.generate(batch_size)
            words = Variable(words, volatile=True)
            target = Variable(target, volatile=True)
            kernel = self.kernel_net(words)
            word_picks = sample(kernel, words)
            pred = self.pred_net(word_picks)
            target = target.unsqueeze(1).expand_as(pred)
            action_list = sample.saved_subsets
            exp_sizes = sample.exp_sizes
            
            loss = nn.MSELoss()(pred, target)
            cum_loss += loss.data[0]
            
            
            ssizes = [sample.data.sum() for example in action_list for sample in example]
            ssize = sum(ssizes) / len(ssizes)
            cum_ssize += ssize
            
        print("Average Loss is:", cum_loss / test_iter)
        print("Average Set Size is:", cum_ssize / test_iter)

In [ ]:
network_params = {'set_size': 20, 'n_clusters': 15}
dtype = torch.DoubleTensor
train_iter = 300
batch_size = 10
lr = 1e-4
alpha_iter=4
baseline=True

In [ ]:
# Simulation with 20
reg=0.1
reg_mean=20
torch.manual_seed(10)
sim20 = SimKDPPDeepSet(network_params, dtype)
sim20.train(1000, batch_size, lr, alpha_iter, baseline, reg, reg_mean)
sim20.evaluate(500)

In [ ]:
sim20.train(1000, batch_size, lr, alpha_iter, baseline, reg, reg_mean)
sim20.evaluate(500)
plot_floats(sim20.loss_dict,50)
plot_floats(sim20.ssize_dict,50)

In [ ]:
# Simulation with 15
reg=0.1
reg_mean=15
torch.manual_seed(10)
sim15 = SimKDPPDeepSet(network_params, dtype)
sim15.train(1000, batch_size, lr, alpha_iter, baseline, reg, reg_mean)
sim15.evaluate(500)

In [ ]:
# Simulation with 10
reg=0.1
reg_mean=10
torch.manual_seed(10)
sim10 = SimKDPPDeepSet(network_params, dtype)
sim10.train(1000, batch_size, lr, alpha_iter, baseline, reg, reg_mean)
sim10.evaluate(500)

In [ ]:
# Simulation with 5
reg=100
reg_mean=5
torch.manual_seed(10)
sim5 = SimKDPPDeepSet(network_params, dtype)
sim5.train(train_iter, batch_size, lr, alpha_iter, baseline, reg, reg_mean)
sim5.evaluate(500)

In [ ]:
sim5.train(train_iter, batch_size, lr, alpha_iter, baseline, 10000, 5)

In [ ]:
sim5.evaluate(100)

In [ ]:
sim

In [ ]:
sim.loss_dict

In [ ]: