In [ ]:
# The old notebook became too confusing!

# Trying to check Singular Values and Singular Vectors in Numpy
 
import numpy as np

# General Set-up
N = 2
A = 0.1 * np.random.randn(N, N) + np.diag(np.arange(1, N+1))
B = np.random.randn(N, N)
I = np.eye(N)
dA = np.random.randn(N, N)
dB = np.random.randn(N, N)
bC = np.random.randn(N, N)
eps = 1e-20
epsi = 1 / eps
Ae = A + 1j*eps*dA
Be = B + 1j*eps*dB # don't need it, I'm working with single input.

# SVD Set-up
u, s, vT = np.linalg.svd(A)

# Complex part
De, Du = np.linalg.eig(Ae.dot(Ae.T))
Du, _, _ = np.linalg.svd(Ae)
idx = De.argsort()[::-1]   
De = De[idx]
Du = u[:,idx]
D = np.real(De) # only needed for what? 

# forward propagation 
dA = dA
dS = np.diag(I * u.T.dot(dA).dot(vT.T))

E = np.outer(np.ones(N), s) - np.outer(s, np.ones(N))
F = 1 / (E + np.eye(N)) - np.eye(N)
dU = u.dot(F * (u.T.dot(dA).dot(vT.T).dot(np.diag(s)) + np.diag(s).dot(vT).dot(dA.T).dot(u)))

# backward gradients
bS = np.random.randn(N) # gradients wrt singular values
bA = u.dot(np.diag(bS)).dot(vT) # backpropagated gradient wrt to matrix A

print('singular value')
print('svd error: ', (np.linalg.norm(s-np.sqrt(D))))

# Forward Check based on complex matrices
print('CVT error (vals): ', (np.linalg.norm(2*s*dS - epsi*np.imag(De))))
print('CVT error (vecs): ', (np.linalg.norm(dU - epsi*np.imag(Du))))

# Backward Check, these are essentially two traces!!
print('adj error: ',np.sum(dA*bA)-np.sum(dS*bS))

# I should be able to use the same identity for my testing purposes!!!
# trace(bC.T * dC) = trace(bA.T * dA) + trace(bB.T * dB)

In [ ]:


In [ ]:
import numpy as np

# Checking eigenvectors and eigenvalues in Numpy
np.random.seed(10)
# General Set-up
N = 5
A = 0.1 * np.random.randn(N, N) + np.diag(np.arange(1, N+1))
#A = A.dot(A.T)
B = np.random.randn(N, N)
I = np.eye(N)
dA = np.random.randn(N, N)
dB = np.random.randn(N, N)
bC = np.random.randn(N, N)
eps = 1e-20
epsi = 1 / eps
Ae = A + 1j*eps*dA
Be = B + 1j*eps*dB

# EIGEN Set-up
De, Ue = np.linalg.eig(Ae)
D = np.real(De)
U = np.real(Ue)

# make dC diagonal equal to zero
Ue = Ue.dot(np.diag(1 / np.diag(np.linalg.inv(U).dot(Ue))))
E = np.outer(np.ones(N), D) - np.outer(D, np.ones(N))
F = 1 / (E + np.eye(N)) - np.eye(N)
P = np.linalg.inv(U).dot(dA.dot(U))
dD = np.eye(N) * P
dU = U.dot(F*P)
#dD, dU = forward(A, dA)

bD = np.diag(np.random.randn(N)) # random perturbation of eigenvalues
bU = np.random.randn(N,N) # random perturbation of eigenvectors
bD = bD + F * (U.T.dot(bU))
bA = np.linalg.inv(U.T).dot(bD.dot(U.T))
print('eigenvalues and eigenvectors')
print('CVT error (vals): ', np.linalg.norm(np.diag(dD) - epsi*np.imag(De)))
print('CVT error (vecs): ', np.linalg.norm(dU - epsi*np.imag(Ue)))
print('adj error (backward): ', np.sum(dA*bA)-np.sum(dD*bD)-np.sum(dU*bU))
#print('CVT error (vecs): ', np.linalg.norm(np.abs(dU) - np.abs(epsi*np.imag(Ue))))

In [ ]:
dU

In [ ]:
# essentials for eig in PyTorch
import torch

N = 5
A = torch.randn(N,N).double()
L = A.mm(A.t())
I = torch.eye(N).double()
dA = torch.randn(N, N).double() 
dL = dA.mm(A.t()) + A.mm(dA.t())

vals, vecs = torch.eig(L, eigenvectors=True)
vals = vals[:,0]
E = vals.expand(N,N) - vals.expand(N,N).t()
F = 1 / (E + I) - I
P = torch.inverse(vecs).mm(dL.mm(vecs))
dvals = I * P
dvecs = vecs.mm(F*P)

# backward pass
bvals = torch.randn(N).diag().double()
bvecs = torch.randn(N,N).double()
med = bvals + F * (vecs.t().mm(bvecs))
bL = torch.inverse(vecs.t()).mm(med.mm(vecs.t()))
bA = bL.mm(A) + bL.t().mm(A)

# check
torch.sum(bA * dA) - torch.sum(dvals * bvals) - torch.sum(dvecs * bvecs)

In [5]:
# essentials for svd in PyTorch-

import torch

M = 6
N = 5
A = torch.randn(M,N).double()
dA = torch.randn(M, N).double() 
dL = dA.mm(A.t()) + A.mm(dA.t())

vecs, vals, v = torch.svd(A)
vals = vals**2
s = vals.size(0)
I = torch.eye(s).double()
E = vals.expand(s,s) - vals.expand(s,s).t()
F = 1 / (E + I) - I
P = vecs.t().mm(dL.mm(vecs))
dvals = I * P
dvecs = vecs.mm(F*P)

# backward pass
bvals = torch.randn(s).diag().double()
bvecs = torch.randn(vecs.size()).double()
med = bvals + F * (vecs.t().mm(bvecs))
bL = vecs.mm(med.mm(vecs.t()))
bA = bL.mm(A) + bL.t().mm(A)

# check
torch.sum(bA * dA) - torch.sum(dvals * bvals) - torch.sum(dvecs * bvecs)


Out[5]:
-3.552713678800501e-15

In [6]:
from torch.autograd import Variable
from dpp_nets.my_torch.linalg import custom_decomp

A = Variable(A, requires_grad=True)
vals, vecs = custom_decomp()(A)
torch.autograd.backward([vals, vecs],[bvals.diag(), bvecs])
torch.sum(A.grad.data == bA)


Out[6]:
30

In [ ]:
# This is an important comparison of my three gradient methods
# DON'T DELETE!!
# 1) Numpy function (Low-rank DPP-Factorization)
# 2) Allinone = Numpy gradient in Pytorch
# 3) Back-propagating through singular value decomposition

import numpy as np
from dpp_nets.dpp.score_dpp import score_dpp
import torch
from torch.autograd import Variable

# 1) NUMPY GRADIENT (THIS IS CONFIRMED TO BE THE SAME AS LOW-RANK PAPER)
embd = np.random.randn(10,10)
subset = np.array([1,1,1,0,0,1,0,1,0,0], dtype=float)
grad_numpy = score_dpp(embd, subset)
print(grad_numpy)

# 2) IN PYTORCH
embd = torch.DoubleTensor(embd)
vecs, vals, _ = torch.svd(embd)
vals.pow_(2)
subset = torch.DoubleTensor(subset)

def backward(kernel, subset):
    dtype = kernel.type()
    n, kernel_dim = kernel.size()
    subset_sum = subset.long().sum()   
    grad_kernel = torch.zeros(kernel.size()).type(dtype)
    
    if subset_sum:
        # auxillary
        P = torch.eye(n).masked_select(subset.expand(n,n).t().byte()).view(subset_sum, -1).type(dtype)
        subembd = P.mm(kernel)
        submatrix = subembd.mm(subembd.t())
        submatinv = torch.inverse(submatrix)
        subgrad = 2 * submatinv.mm(subembd)
        subgrad = P.t().mm(subgrad)
        grad_kernel.add_(subgrad)

        # Gradient from whole L matrix
        K = kernel.t().mm(kernel) # not L!
        I_k = torch.eye(kernel_dim).type(dtype)
        I = torch.eye(n).type(dtype)
        inv = torch.inverse(I_k + K)
        B = I - kernel.mm(inv).mm(kernel.t())
        grad_from_full = 2 * B.mm(kernel)
        grad_kernel.sub_(grad_from_full)

        return grad_kernel
    
grad_pytorch = backward(embd, subset)
print(grad_pytorch)
grad_numpy - grad_pytorch.numpy()

# 3) GRADIENT WITH SVD BACKPROPGATION
def backward_1(vals, vecs, subset):
    # Set-up
    dtype = vals.type()
    n = vecs.size(0)
    n_vals = vals.size(0)
    subset_sum = subset.long().sum()

    grad_vals = 1 / vals
    grad_vecs = torch.zeros(n, n_vals).type(dtype)

    if subset_sum:
        # auxillary
        matrix = vecs.mm(vals.diag()).mm(vecs.t())
        P = torch.eye(n).masked_select(subset.expand(n,n).t().byte()).view(subset_sum, -1).type(dtype)
        submatrix = P.mm(matrix).mm(P.t())
        subinv = torch.inverse(submatrix)
        Pvecs = P.mm(vecs)

        grad_vals += Pvecs.t().mm(subinv).mm(Pvecs).diag()
        grad_vecs += P.t().mm(subinv).mm(Pvecs).mm(vals.diag())    

    return grad_vals, grad_vecs

def backward_2(grad_vals, grad_vecs, mat, vals, vecs):
    
    # unpack
    N = grad_vals.size(0)

    # auxillary
    I = grad_vecs.new(N,N).copy_(torch.eye(N))
    F = vals.expand(N,N) - vals.expand(N,N).t()
    F.add_(I).reciprocal_().sub_(I)

    # gradient
    grad_mat = grad_vals.diag() + F * (vecs.t().mm(grad_vecs)) # intermediate variable 
    grad_mat = vecs.mm(grad_mat.mm(vecs.t())) # this is the gradient wrt L
    grad_mat = grad_mat.mm(mat) + grad_mat.t().mm(mat) # this is the grad wrt A

    return grad_mat

grad_vals, grad_vecs = backward_1(vals, vecs, subset)
backward_2(grad_vals, grad_vecs, embd, vals, vecs)

In [ ]:


In [ ]:
from dpp_nets.my_torch.DPP import AllInOne
import torch
from torch.autograd import Variable

A = Variable(torch.randn(10,10).double(), requires_grad=True)
transform = AllInOne()
subset = transform(A)
subset.reinforce(1)
subset.sum().backward()
print(A.grad.data)
print(backward(A.data, subset.data)) # yes it works correctly

In [1]:
## Marginal Trainer
# Implement Gradient for Inverse
# in order to check if my gradient is working correctly 
import torch
from torch.autograd import Variable, Function
from dpp_nets.my_torch.linalg import custom_decomp

class var_inverse(Function):
    
    def forward(self, A):
        C = torch.inverse(A)
        self.save_for_backward(C)
        return C
    
    def backward(self, grad_C):
        C, = self.saved_tensors
        grad_A = -C.t().mm(grad_C).mm(C.t())
        return grad_A

In [76]:
# Set-up 
# backpropgation through singular values works!

M = 4
N = 6
data = torch.randn(M, N).double()
identity = Variable(torch.eye(M).double())

# short-cut (using SVD)
Vs = Variable(data, requires_grad=True)
vals, vecs = custom_decomp()(Vs)

Ltemp = vecs.mm(vals.diag()).mm(vecs.t())
K = identity - vecs.mm((1 / (vals +1)).diag()).mm(vecs.t())
print("K is :", K)
#K.diag().backward(torch.arange(0,max(M,N)).double())
#print("Grad is: ", Vs.grad.data)

# conventional (long-way)
Vl = Variable(data, requires_grad=True)
L = Vl.mm(Vl.t())
K = identity - var_inverse()((L + identity))
print("K is: ", K)
#K.diag().backward(torch.arange(0,max(M,N)).double())
#print("Grad is: ", Vl.grad.data)


K is : Variable containing:
 0.6022 -0.2172  0.0162 -0.1169
-0.2172  0.5719 -0.0465 -0.0628
 0.0162 -0.0465  0.8163  0.0278
-0.1169 -0.0628  0.0278  0.7891
[torch.DoubleTensor of size 4x4]

K is:  Variable containing:
 0.6022 -0.2172  0.0162 -0.1169
-0.2172  0.5719 -0.0465 -0.0628
 0.0162 -0.0465  0.8163  0.0278
-0.1169 -0.0628  0.0278  0.7891
[torch.DoubleTensor of size 4x4]


In [47]:
# even better short-cut (when only interested in marginals)
Vss = Variable(data, requires_grad=True)
vals, vecs = custom_decomp()(Vss)
K = vecs.mm((1 / (vals +1)).diag()).mm(vecs.t()) # not really K
marginals = (1 - K.diag()).diag()
print("Marginals are: ", marginals)
marginals.backward(torch.arange(0,max(M,N)).diag().double())
print("Grad is:", Vss.grad.data)

# Deviation
(Vss.grad.data - Vl.grad.data).abs().sum() // N**2

In [7]:
from torch.autograd import Variable, Function
from dpp_nets.my_torch.linalg import custom_decomp


class new_custom_decomp(Function):
    """
    This is a "strange" but useful decomposition. 
    It takes in a matrix A of size m x n. 
    It will return the eigenvectors (vecs) 
    and eigenvalues (vals) of the matrix 
    L = A.mm(A.t()), but without actually
    computing L. 
    
    Any square or rectangular shapes are allowed
    for A. :-)
    """
    def __init__(self, some=False):
        self.some = some
        
    def forward(self, mat):
        vecs, vals, _ = torch.svd(mat, self.some)
        vals.pow_(2)
        self.save_for_backward(mat, vals, vecs)
        
        return vals, vecs
    
    def backward(self, grad_vals, grad_vecs):
        # unpack
        mat, vals, vecs = self.saved_tensors 
        N = vals.size(0)
        
        # auxillary
        I = grad_vecs.new(N,N).copy_(torch.eye(N))
        F = vals.expand(N,N) - vals.expand(N,N).t()
        F.add_(I).reciprocal_().sub_(I)

        # gradient
        grad_mat = grad_vals.diag() + F * (vecs.t().mm(grad_vecs)) # intermediate variable 
        grad_mat = vecs.mm(grad_mat.mm(vecs.t())) # this is the gradient wrt L
        grad_mat = grad_mat.mm(mat) + grad_mat.t().mm(mat) # this is the grad wrt A
        
        return grad_mat

In [17]:
M = 6
N = 4
A = torch.randn(M, N)
L = A.mm(A.t())
vecs, vals, _ = torch.svd(A, some=False)
print(vecs.size(), vals.size())
vals = vals**2


torch.Size([6, 6]) torch.Size([4])

In [18]:
print(L)
print(vecs.mm(vals.diag()).mm(vecs.t()))


 10.1708  -5.4160   0.0345   2.8779   1.4180   0.3497
 -5.4160   7.3209   3.2153  -4.6769  -1.2542  -0.5570
  0.0345   3.2153   2.5564  -2.0274  -0.2828  -0.0499
  2.8779  -4.6769  -2.0274   3.4030   0.9662   0.3621
  1.4180  -1.2542  -0.2828   0.9662   0.6507  -0.7268
  0.3497  -0.5570  -0.0499   0.3621  -0.7268   2.4910
[torch.FloatTensor of size 6x6]

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-18-678005158231> in <module>()
      1 print(L)
----> 2 print(vecs.mm(vals.diag()).mm(vecs.t()))

RuntimeError: size mismatch, m1: [6 x 6], m2: [4 x 4] at /Users/soumith/miniconda2/conda-bld/pytorch_1493757319118/work/torch/lib/TH/generic/THTensorMath.c:1237

In [ ]: