In [1]:
import numpy as np
import scipy.sparse as sp
from itertools import groupby
from operator import itemgetter
import time
import csv
from scipy.stats import wishart
from numpy.linalg import inv
from numpy.linalg import cholesky
In [2]:
matrixData=np.array(list(csv.reader(open("netflixdata.csv","rb"),delimiter=','))).astype('int')
Vj=np.array(list(csv.reader(open("movie.csv","rb"),delimiter=','))).astype('float')
Ui=np.array(list(csv.reader(open("user.csv","rb"),delimiter=','))).astype('float')
spr=sp.csc_matrix(matrixData)
In [3]:
b0=2.0
dim=10
m0=np.zeros((dim,1))
w0=np.identity(dim)
v0=10.
In [4]:
def NUV(latentFeat):
nuv = len(latentFeat)
return nuv
def SUV(latentFeat):
suv = np.cov(latentFeat.T)
return suv
def XUV(latentFeat):
xuv = np.mean(latentFeat,axis=0).reshape(dim,1)
return xuv
In [5]:
posList=(np.transpose(sp.find(spr)[0:2])).tolist()
posList.sort(key=itemgetter(1))
groups = groupby(posList, itemgetter(1))
p1=np.array([[item[0] for item in data] for (key, data) in groups])
posList.sort(key=itemgetter(0))
groups = groupby(posList, itemgetter(0))
p2=np.array([[item[1] for item in data] for (key, data) in groups])
b=np.diff(spr.indptr) == 0
zc=[i for i,x in enumerate(b) if x == True]
In [6]:
def rij(mdata):
r = map(lambda x : x.data.reshape(-1,1), mdata.T)
for zeros in zc:
r[zeros] = np.array([[0]])
return r
def rij2(mdata):
r = map(lambda x : x.data.reshape(-1,1), mdata)
return r
In [22]:
def AB(latentFeat, iter):
ab = map(lambda X: np.array(map(lambda x : latentFeat[x],X)),p1)
if iter < 2:
for zeros in zc:
ab.insert(zeros, np.zeros((1,dim)))
return ab
def BA(latentFeat):
ba = map(lambda X: np.array(map(lambda x : latentFeat[x],X)),p2)
return ba
In [8]:
def wScale(latentFeat):
ws = ((inv(inv(w0)+NUV(latentFeat)*SUV(latentFeat)+b0*NUV(latentFeat)/(b0+NUV(latentFeat))*np.dot((m0-XUV(latentFeat)),(m0-XUV(latentFeat)).T)))+(inv(inv(w0)+NUV(latentFeat)*SUV(latentFeat)+b0*NUV(latentFeat)/(b0+NUV(latentFeat))*np.dot((m0-XUV(latentFeat)),(m0-XUV(latentFeat)).T))).T)/2
return ws
def LL(latentFeat):
ll = cholesky((inv(b0+NUV(latentFeat)*covaruv(latentFeat))+inv(b0+NUV(latentFeat)*covaruv(latentFeat)).T)/2)
return ll
def covaruv(latentFeat):
cuv = wishart(v0+NUV(latentFeat),wScale(latentFeat)).rvs(1)
return cuv
def meanuv(latentFeat):
m = (b0*m0+NUV(latentFeat)*XUV(latentFeat))/(b0+NUV(latentFeat))
muv = m + (np.dot(LL(latentFeat),np.random.normal(0, 1,dim))).reshape(dim,1)
return muv
In [9]:
def covar(iAB,iLv):
cv = (inv(iLv+2*np.dot(iAB.T,iAB))+(inv(iLv+2*np.dot(iAB.T,iAB))).T)/2
return cv
def mmean(covar, iAB, rij, iLv, imv):
mm = np.dot(covar,b0*np.dot(iAB.T,(rij))+np.dot(iLv,imv))
return mm
def ABlat(covar,mmean):
abl=mmean+(np.dot(cholesky(covar),np.random.normal(0, 1,dim))).reshape(dim,1)
return abl
In [23]:
def Gibbs(sparseM, priorU , priorV, iterations):
r1 = rij(sparseM)
r2 = rij2(sparseM)
iU0=priorU
iV0=priorV
for ii in xrange(iterations):
ilv = covaruv(iV0)
imv = meanuv(iV0)
ilu = covaruv(iU0)
imu = meanuv(iU0)
for thinning in xrange(2):
iter = ii+1
iAB = AB(iU0, iter)
iL = map(covar, iAB, [ilv for number in xrange(len(iAB))])
ia= map(mmean, iL, iAB, r1, [ilv for number in xrange(len(iAB))], [imv for number in xrange(len(iAB))])
iV0 = np.array(map(ABlat, iL, ia)).reshape(len(iAB),dim)
iAB = BA(iV0)
iL = map(covar, iAB, [ilu for number in xrange(len(iAB))])
ia= map(mmean, iL, iAB, r2, [ilu for number in xrange(len(iAB))], [imu for number in xrange(len(iAB))])
iU0 = np.array(map(ABlat, iL, ia)).reshape(len(iAB),dim)
return iU0
In [24]:
t0= time.clock()
Gibbs(spr, Ui, Vj, 1)
print time.clock() - t0
In [ ]: