In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
V = np.array([[ 0.58, 0. ],
[ 0.58, 0. ],
[ 0.58, 0. ],
[ 0. , 0.71],
[ 0. , 0.71]])
In [6]:
Leslie = np.array([0,3,0,0,4])
In [10]:
concept = V.T.dot(Leslie)
concept
Out[10]:
In [11]:
Leslie.dot(V)
Out[11]:
In [12]:
concept.dot(V.T)
Out[12]:
In [13]:
M = np.array([[4, 4, 0, 0, 3, 2, 2, 0],
[1, 0, 0, 4, 0, 0, 1, 5],
[0, 2, 3, 3, 0, 2, 1, 3],
[1, 0, 1, 1, 2, 0, 4, 5],
[4, 4, 4, 1, 1, 1, 0, 0],
[0, 1, 1, 0, 4, 3, 0, 5]]).astype(np.float32)
In [27]:
# Rank of M
np.linalg.matrix_rank(M)
Out[27]:
(a) Initialize the matrices Q and P according to the SVD decomposition of R assuming missing ratings are 0. What is the value of the function f(P;Q)?
In [35]:
from scipy.sparse.linalg import svds
init = 'random'
k = 3
if init == 'svd':
U, s, V = svds(M, k=k)
print(U.shape, s.shape, V.shape)
S = np.diag(s)
Q = U.dot(S)
P = V
elif init == 'random':
Q = np.random.random((M.shape[0], k))
P = np.random.random((k, M.shape[1]))
else:
raise ValueError
(b) Measure the loss f(P;Q) after each full iteration of the alternating optimization. Plot the value of this loss. How does it behave over time?
In [36]:
from alternating import optimize
In [37]:
loss = optimize(M, P, Q)
plt.plot(loss)
Out[37]:
In [31]:
for k in range(1,5):
U, s, V = svds(M, k=k)
S = np.diag(s)
P = V
Q = U.dot(S)
loss = optimize(M, P, Q, False)
plt.plot(loss, label='k={}'.format(k))
plt.legend()
Out[31]:
(c) What happens if the rank k equals to 1, 2 or 4? What happens if we increase k above values of 4?
In [34]:
U, s, V = svds(M, k=5)
S = np.diag(s)
Q = U.dot(S)
P = V
loss = optimize(M, P, Q, False)
(d) What happens if you initialize the matrices P and Q randomly (random values be- tween 0 and 1) rather than using SVD initialization?
Answer: It takes much longer to converge to the same error