In [ ]:
In [1]:
import torch
from torch.autograd import Variable
from torch.autograd import Function
import numpy as np
In [ ]:
# PIPELINE
# feed in a tensor of size batch_size * max_set_size * embd_dim
# coud possibly be a packed sequence which includes length information for each batch_size
# 0) Assume we have created sequences in DataSet (they are already padded)
# 1) Collapse to 2D to feed through kernel embedding
# 2) Reconstruct using for-loop the kernel matrix L, do eigendecomposition and sample from DPP
# 3) New Kernel should batch_size * alpha_iter * embd_dim (contains summed_selection for each batch + iteration)
# 4) Collapse to 2D and feed through prediction network
# 5) Get something of size (batch_size x alpha_iter) * target_dim, make target compatible with this
# 6) Backpropagate the loss
mask = data.abs().sum(2).sign().squeeze()
lengths = mask.sum(1)
In [88]:
samples
Out[88]:
In [69]:
[torch.zeros(alpha_iter,i) for i in (max_set_size - length.data)]
Out[69]:
In [66]:
torch.cat([torch.zeros(0),torch.zeros(0)])
Out[66]:
In [269]:
def my_hook(i, j):
def my_print(module, grad_in, grad_out):
print(i,j, loss_list[i][j])
return my_print
In [270]:
import torch
import torch.nn as nn
# Set up data
batch_size = 5
max_set_size = 6
feat_dim = 7
target_dim = 3
alpha_iter = 5
hidden_dim = 10
alpha_iter = 2
kernel = nn.Linear(feat_dim, hidden_dim)
predictor = nn.Linear(feat_dim, target_dim)
data = torch.zeros(batch_size, max_set_size, feat_dim)
data[0,:4] = torch.randn(4,feat_dim)
data[1,:3] = torch.randn(3,feat_dim)
data[2,:6] = torch.randn(6,feat_dim)
data[3,:4] = torch.randn(4,feat_dim)
data[4,:5] = torch.randn(5,feat_dim)
data = Variable(data)
target = Variable(torch.randn(batch_size, target_dim))
criterion = nn.MSELoss()
# Forward pass
mask = data.abs().sum(2).sign().byte()
#length = mask.sum(1).squeeze()
batch_kernel = kernel(data.masked_select(mask.expand_as(data)).view(-1, feat_dim))
#batch_kernel.sum().backward()
s = 0
samples = [[] for i in range(batch_size)]
for i, e in enumerate(length.data):
A = batch_kernel[s:e]
L = A.mm(A.t())
e, v = custom_eig()(L)
for j in range(alpha_iter):
subset = DPPLayer()(e,v)
DPPLayer.register_backward_hook(my_hook(i,j))
sample = pad_with_zeros(subset, max_set_size)
samples[i].append(sample)
samples = [torch.stack(i) for i in samples]
reps = [samples[i].mm(data[i]) for i in range(batch_size)]
big = torch.cat(reps)
predictions = predictor(big).view(batch_size, alpha_iter, target_dim)
target = target.unsqueeze(1).expand(batch_size, alpha_iter, target_dim)
loss = criterion(predictions, target)
loss_list = list(((predictions - target)**2).mean(2).view(-1).data)
loss_list = list(((predictions - target)**2).mean(2).data)
loss_list = [list(i.view(-1)) for i in loss_list]
loss.backward()
In [217]:
loss_list = list(((predictions - target)**2).mean(2).data)
loss_list = [list(i.view(-1)) for i in loss_list]
In [248]:
samples
Out[248]:
In [42]:
kernel = med[start:end]
L = kernel.mm(kernel.t())
e, v = custom_eig()(L)
for j in range(3):
subset = DPP()(e, v)
my_list[i].append(subset)
start = end
new_list = [torch.stack(l) for l in my_list]
In [30]:
In [ ]:
In [1]:
# Set-up
# THIS COULD BE IT!!
# THIS IS IT!
# Let's do it!
N = 3
A = torch.randn(N,N).double()
A = A.mm(A.t())
#A = torch.Tensor(A).float()
e, v = torch.eig(A, eigenvectors=True)
e = e[:,0]
# Random perturbation for forward
dA = torch.randn(N,N)
E = e.expand(N,N) - e.expand(N,N).t()
F = 1 / (E + torch.eye(N)) - torch.eye(N)
P = v.inverse().mm(dA).mm(v)
de = torch.eye(N) * P
dv = v.mm(F * P)
# random perturbation for backward
be = torch.randn(N).diag()
bv = torch.randn(N, N)
#be = torch.ones(N).diag()
#bv = torch.ones(N, N)
med = be + F * (v.t().mm(bv))
bA = v.t().inverse().mm(med).mm(v.t())
print('adj error: ',torch.sum(dA*bA)-torch.sum(de*be)-torch.sum(dv*bv))
bA
# Check forward pass using analytic function and complex matrices
In [80]:
# Checking SVD IN NUMPY!!
import numpy as np
# General Set-up
N = 4
A = 0.1 * np.random.randn(N, N) + np.diag(np.arange(1, N+1))
B = np.random.randn(N, N)
I = np.eye(N)
dA = np.random.randn(N, N)
dB = np.random.randn(N, N)
bC = np.random.randn(N, N)
eps = 1e-20
epsi = 1 / eps
Ae = A + 1j*eps*dA
Be = B + 1j*eps*dB
# SVD
u, s, vT = np.linalg.svd(A)
s = np.diag(s)
De, Du =np.linalg.eig(Ae.dot(Ae.T))
D = np.real(De)
U = np.real(Ue)
# make dC diagonal equal to zero
Ue = Ue.dot(np.diag(1 / np.diag(np.linalg.inv(U).dot(Ue))))
E = np.outer(np.ones(N), D) - np.outer(D, np.ones(N))
F = 1 / (E + np.eye(N)) - np.eye(N)
P = np.linalg.inv(U).dot(dA.dot(U))
dD = np.eye(N) * P
dU = U.dot(F*P)
bD = np.diag(np.random.randn(N))
bU = np.random.randn(N,N)
bD = bD + F * (U.T.dot(bU))
bA = np.linalg.inv(U.T).dot(bD.dot(U.T))
print('eigenvalues and eigenvectors')
print('CVT error: ', np.linalg.norm(np.diag(dD)-epsi*np.imag(De)))
print('CVT error: ', np.linalg.norm(dU-epsi*np.imag(Ue)))
print('adj error: ',np.sum(dA*bA)-np.sum(dD*bD)-np.sum(dU*bU))
In [77]:
De, Du =np.linalg.eig(Ae.dot(Ae.T))
In [78]:
De
Out[78]:
In [40]:
# Let's do the above thing for SVD!!
# First just do it theoretically, then try with my auto_grad
from dpp_nets.my_torch.linalg import custom_svd
M = 4
N = 5
eps = 1e-20
epsi = 1 / eps
dA = torch.randn(M, N).double()
A = torch.randn(M,N).double()
vecs, vals, v = torch.svd(A, some=False) # M x M, M, N x N
# Random perturbation for forward pass
utdAv = vecs.t().mm(dA).mm(v) #M x N
dP1 = utdAv[:,:M] # M x M
dP2 = utdAv[:,M:] # M x (N - M)
dS = utdAv.diag() # M
E = vals.expand(M,M) - vals.expand(M,M).t() # mask
F = 1 / (E + torch.eye(M).double()) - torch.eye(M).double()
dC = F * (dP1.mm(vals.diag()) + vals.diag().mm(dP1.t()))
dU = vecs.mm(dC)
dvals = dS
dvecs = dU
# Backward PASS
bvals = torch.randn(M).diag().double()
bvecs = torch.randn(M, M).double()
bP1 = (vecs.t() * F).mm(bvecs).mm(vals.diag()) + bvecs.t().mm(vecs * F.t()).mm(vals.diag())
med = bvals + bP1
bA = vecs.mm(med).mm(v[:,:M].t())
# Now check it
print('adj error: ',torch.sum(dA*bA)-torch.sum(dvals*bvals.diag())-torch.sum(dvecs*bvecs))
In [39]:
dS
Out[39]:
In [20]:
vecs.mm(bvals).
Out[20]:
In [14]:
bvecs
Out[14]:
In [ ]:
A_var = Variable(A, requires_grad=True)
e_var, v_var = custom_eig()(A_var)
be_var = torch.FloatTensor(be.diag())
bv_var = torch.FloatTensor(bv)
e_var.backward(be_var, retain_variables=True)
v_var.backward(bv_var)
bA = A_var.grad.data
bA
# artificial forward pass - simply re-use the tensors from the other cell
# in fact by showing that the backward gradients agree, we have already established proof of concept
print('adj error: ',torch.sum(dA*bA)-torch.sum(de*be)-torch.sum(dv*bv))
bA
In [ ]:
# Scalability - Flexible batch_size
import torch
import torch.nn as nn
torch.manual_seed(10)
batch_size = 5
max_set_size = 6
feat_dim = 4
hidden_dim = 300
data = torch.randn(batch_size, max_set_size, feat_dim)
model = nn.Linear(feat_dim, hidden_dim)
In [ ]:
# now make it tensor-ready
mask, _ = data.abs().max(dim=2)
length = mask.sign().sum(dim=1).squeeze()
mask = mask.sign().expand_as(data).byte()
my_input = Variable(data, requires_grad=True)
compressed = my_input.masked_select(Variable(mask)).view(-1,feat_dim)
med = model(compressed)
# now do the eigendecomposition (for this need to re-assemble the tensor again)
# this probably needs a for-loop :(((((
# for i in range(batch_size):
start = 0
my_list = [[] for i in range(batch_size)]
for i, end in enumerate(length.cumsum(0).long()):
kernel = med[start:end]
L = kernel.mm(kernel.t())
e, v = custom_eig()(L)
for j in range(3):
subset = DPP()(e, v)
my_list[i].append(subset)
start = end
new_list = [torch.stack(l) for l in my_list]
#loss = torch.stack(my_list)
#final = loss.sum()
#final.backward()
In [ ]: