In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Exercise 1 - SVD


In [3]:
V = np.array([[ 0.58,  0.  ],
              [ 0.58,  0.  ],
              [ 0.58,  0.  ],
              [ 0.  ,  0.71],
              [ 0.  ,  0.71]])

In [6]:
Leslie = np.array([0,3,0,0,4])

In [10]:
concept = V.T.dot(Leslie)
concept


Out[10]:
array([ 1.74,  2.84])

In [11]:
Leslie.dot(V)


Out[11]:
array([ 1.74,  2.84])

In [12]:
concept.dot(V.T)


Out[12]:
array([ 1.0092,  1.0092,  1.0092,  2.0164,  2.0164])

Exercise 3 - Alternating Optimization


In [13]:
M = np.array([[4, 4, 0, 0, 3, 2, 2, 0],
     [1, 0, 0, 4, 0, 0, 1, 5],
     [0, 2, 3, 3, 0, 2, 1, 3],
     [1, 0, 1, 1, 2, 0, 4, 5],
     [4, 4, 4, 1, 1, 1, 0, 0],
     [0, 1, 1, 0, 4, 3, 0, 5]]).astype(np.float32)

In [27]:
# Rank of M
np.linalg.matrix_rank(M)


Out[27]:
6

(a) Initialize the matrices Q and P according to the SVD decomposition of R assuming missing ratings are 0. What is the value of the function f(P;Q)?


In [35]:
from scipy.sparse.linalg import svds

init = 'random'
k = 3
if init == 'svd':
    U, s, V = svds(M, k=k)
    print(U.shape, s.shape, V.shape)
    S = np.diag(s)
    Q = U.dot(S)
    P = V
elif init == 'random':
    Q = np.random.random((M.shape[0], k))
    P = np.random.random((k, M.shape[1]))
else:
    raise ValueError

(b) Measure the loss f(P;Q) after each full iteration of the alternating optimization. Plot the value of this loss. How does it behave over time?


In [36]:
from alternating import optimize

In [37]:
loss = optimize(M, P, Q)
plt.plot(loss)


('loss', 194.79451468992971)
('loss', 20.019171193993962)
('loss', 3.0734696818642431)
('loss', 1.9482781823027493)
('loss', 1.2334525482471443)
('loss', 0.7593449651050318)
('loss', 0.46506139477754327)
('loss', 0.28365396490235995)
('loss', 0.1700473873551395)
('loss', 0.099715581801592498)
Out[37]:
[<matplotlib.lines.Line2D at 0x9cd8e10>]

In [31]:
for k in range(1,5):
    U, s, V = svds(M, k=k)
    S = np.diag(s)
    P = V
    Q = U.dot(S)
    loss = optimize(M, P, Q, False)
    plt.plot(loss, label='k={}'.format(k))
plt.legend()


Out[31]:
<matplotlib.legend.Legend at 0x7e5e3c8>

(c) What happens if the rank k equals to 1, 2 or 4? What happens if we increase k above values of 4?


In [34]:
U, s, V = svds(M, k=5)
S = np.diag(s)
Q = U.dot(S)
P = V
loss = optimize(M, P, Q, False)


---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-34-1b92362437ff> in <module>()
      3 Q = U.dot(S)
      4 P = V
----> 5 loss = optimize(M, P, Q, False)

C:\Users\dante\src\Dropbox\Mining Massive Datasets (IN2323)\alternating.py in optimize(M, P, Q, verbose)
     10         for user_idx in range(M.shape[1]):
     11             nnz_idx = np.argwhere(M[:, user_idx]).flatten()
---> 12             res =np.linalg.inv(Q[nnz_idx].T.dot(Q[nnz_idx])).dot(Q[nnz_idx].T.dot(M[nnz_idx, user_idx]))
     13             P[:, user_idx] = res
     14 

C:\tools\Anaconda2\lib\site-packages\numpy\linalg\linalg.pyc in inv(a)
    524     signature = 'D->D' if isComplexType(t) else 'd->d'
    525     extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 526     ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
    527     return wrap(ainv.astype(result_t, copy=False))
    528 

C:\tools\Anaconda2\lib\site-packages\numpy\linalg\linalg.pyc in _raise_linalgerror_singular(err, flag)
     88 
     89 def _raise_linalgerror_singular(err, flag):
---> 90     raise LinAlgError("Singular matrix")
     91 
     92 def _raise_linalgerror_nonposdef(err, flag):

LinAlgError: Singular matrix

(d) What happens if you initialize the matrices P and Q randomly (random values be- tween 0 and 1) rather than using SVD initialization?

Answer: It takes much longer to converge to the same error