note: algorithm taken from by Albert Au Yeung
In [4]:
import numpy as np
"""
@INPUT:
R : a matrix to be factorized, dimension N x M
P : an initial matrix of dimension N x K
Q : an initial matrix of dimension M x K
K : the number of latent features
steps : the maximum number of steps to perform the optimisation
alpha : the learning rate
beta : the regularization parameter
@OUTPUT:
the final matrices P and Q
"""
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
Q = Q.T
for step in xrange(steps):
for i in xrange(len(R)):
for j in xrange(len(R[i])):
if R[i][j] > 0:
eij = R[i][j] - np.dot(P[i,:],Q[:,j])
for k in xrange(K):
P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
eR = np.dot(P,Q)
e = 0
for i in xrange(len(R)):
for j in xrange(len(R[i])):
if R[i][j] > 0:
e = e + pow(R[i][j] - np.dot(P[i,:],Q[:,j]), 2)
for k in xrange(K):
e = e + (beta/2) * ( pow(P[i][k],2) + pow(Q[k][j],2) )
if e < 0.001:
break
return P, Q.T
In [6]:
R = [[5,3,0,1],
[4,0,0,1],
[1,1,0,5],
[1,0,0,4],
[0,1,5,4]]
R = np.array(R)
N = len(R)
M = len(R[0])
numberOfFactors = 2
# Initialize the user and item factor matrices
P = np.random.rand(N,numberOfFactors)
Q = np.random.rand(M,numberOfFactors)
# Run algorithm
nP, nQ = matrix_factorization(R, P, Q, numberOfFactors)
# Get filled in R matrix
nR = np.dot(nP, nQ.T)
print nR
In [ ]: