In [1]:
from theano.sandbox import cuda
In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function
import pandas as pd
In [3]:
df = pd.read_csv('./data/movielens/sample/ratings.csv')
users = df.userId.unique()
movies = df.movieId.unique()
userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}
df.movieId = df.movieId.apply(lambda x: movieid2idx[x])
df.userId = df.userId.apply(lambda x: userid2idx[x])
df.head()
Out[3]:
In [4]:
nusers = df.userId.nunique()
nmovies = df.movieId.nunique()
nfactors = 50
nusers, nmovies
Out[4]:
In [5]:
def embedding_input(name, n_in, n_out, reg):
"""
Creates a matrix for n_in x n_out of random numbers
"""
inp = Input(shape=(1,), dtype='int64', name=name)
return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)
In [6]:
user_in, u = embedding_input('user_in', nusers, nfactors, 1e-4)
movie_in, m = embedding_input('movie_in', nmovies, nfactors, 1e-4)
In [7]:
##Split into training and validation
msk = np.random.rand(len(df)) < 0.8
trn = df[msk]
val = df[~msk]
In [8]:
x = merge([u, m], mode='concat')
x = Flatten()(x)
x = Dense(70, activation='relu')(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')
In [9]:
nn.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, nb_epoch=8,
validation_data=([val.userId, val.movieId], val.rating))
Out[9]:
In [ ]: