In [1]:
import numpy as np
import scipy.sparse as sp
from itertools import groupby
from operator import itemgetter
import time
import csv
from scipy.stats import wishart
from numpy.linalg import inv
from numpy.linalg import cholesky

In [2]:
matrixData=np.array(list(csv.reader(open("netflixdata.csv","rb"),delimiter=','))).astype('int')
Vj=np.array(list(csv.reader(open("movie.csv","rb"),delimiter=','))).astype('float')
Ui=np.array(list(csv.reader(open("user.csv","rb"),delimiter=','))).astype('float')
spr=sp.csc_matrix(matrixData)

In [3]:
b0=2.0
dim=10
m0=np.zeros((dim,1))
w0=np.identity(dim)
v0=10.

In [4]:
def NUV(latentFeat):
    nuv = len(latentFeat)
    return nuv

def SUV(latentFeat):
    suv = np.cov(latentFeat.T)
    return suv

def XUV(latentFeat):
    xuv = np.mean(latentFeat,axis=0).reshape(dim,1)
    return xuv

In [5]:
posList=(np.transpose(sp.find(spr)[0:2])).tolist()
posList.sort(key=itemgetter(1))
groups = groupby(posList, itemgetter(1))
p1=np.array([[item[0] for item in data] for (key, data) in groups])
posList.sort(key=itemgetter(0))
groups = groupby(posList, itemgetter(0))
p2=np.array([[item[1] for item in data] for (key, data) in groups])

b=np.diff(spr.indptr) == 0
zc=[i for i,x in enumerate(b) if x == True]

In [6]:
def rij(mdata):
    r = map(lambda x : x.data.reshape(-1,1), mdata.T)
    for zeros in zc:
        r[zeros] = np.array([[0]])
    return r
    
def rij2(mdata):
    r = map(lambda x : x.data.reshape(-1,1), mdata)
    return r

In [22]:
def AB(latentFeat, iter):
    ab = map(lambda X: np.array(map(lambda x : latentFeat[x],X)),p1)
    if iter < 2:
        for zeros in zc:
            ab.insert(zeros, np.zeros((1,dim)))
    return ab

def BA(latentFeat):
    ba = map(lambda X: np.array(map(lambda x : latentFeat[x],X)),p2)
    return ba

In [8]:
def wScale(latentFeat):
    ws = ((inv(inv(w0)+NUV(latentFeat)*SUV(latentFeat)+b0*NUV(latentFeat)/(b0+NUV(latentFeat))*np.dot((m0-XUV(latentFeat)),(m0-XUV(latentFeat)).T)))+(inv(inv(w0)+NUV(latentFeat)*SUV(latentFeat)+b0*NUV(latentFeat)/(b0+NUV(latentFeat))*np.dot((m0-XUV(latentFeat)),(m0-XUV(latentFeat)).T))).T)/2
    return ws

def LL(latentFeat):
    ll = cholesky((inv(b0+NUV(latentFeat)*covaruv(latentFeat))+inv(b0+NUV(latentFeat)*covaruv(latentFeat)).T)/2)
    return ll

def covaruv(latentFeat):
    cuv = wishart(v0+NUV(latentFeat),wScale(latentFeat)).rvs(1)
    return cuv

def meanuv(latentFeat):
    m = (b0*m0+NUV(latentFeat)*XUV(latentFeat))/(b0+NUV(latentFeat))
    muv = m + (np.dot(LL(latentFeat),np.random.normal(0, 1,dim))).reshape(dim,1)
    return muv

In [9]:
def covar(iAB,iLv):
    cv = (inv(iLv+2*np.dot(iAB.T,iAB))+(inv(iLv+2*np.dot(iAB.T,iAB))).T)/2
    return cv


def mmean(covar, iAB, rij, iLv, imv):
    mm = np.dot(covar,b0*np.dot(iAB.T,(rij))+np.dot(iLv,imv))
    return mm

def ABlat(covar,mmean):
    abl=mmean+(np.dot(cholesky(covar),np.random.normal(0, 1,dim))).reshape(dim,1)
    return abl

In [23]:
def Gibbs(sparseM, priorU , priorV, iterations):
    r1 = rij(sparseM)
    r2 = rij2(sparseM)
    
    iU0=priorU
    iV0=priorV
    
    for ii in xrange(iterations):
        ilv = covaruv(iV0)
        imv = meanuv(iV0)
        
        ilu = covaruv(iU0)
        imu = meanuv(iU0)
    
        for thinning in xrange(2):
            iter = ii+1
            iAB = AB(iU0, iter)
            iL = map(covar, iAB, [ilv for number in xrange(len(iAB))])
            ia= map(mmean, iL, iAB, r1, [ilv for number in xrange(len(iAB))], [imv for number in xrange(len(iAB))])
            iV0 = np.array(map(ABlat, iL, ia)).reshape(len(iAB),dim)
            
        
            iAB = BA(iV0)
            iL = map(covar, iAB, [ilu for number in xrange(len(iAB))])
            ia= map(mmean, iL, iAB, r2, [ilu for number in xrange(len(iAB))], [imu for number in xrange(len(iAB))])
            iU0 = np.array(map(ABlat, iL, ia)).reshape(len(iAB),dim)
        

    return iU0

In [24]:
t0= time.clock()
Gibbs(spr, Ui, Vj, 1)
print time.clock() - t0


8.76414

In [ ]: