In [1]:
from theano.sandbox import cuda


Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)

In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function
import pandas as pd


Using Theano backend.

In [3]:
df = pd.read_csv('./data/movielens/sample/ratings.csv')
users = df.userId.unique()
movies = df.movieId.unique()
userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}
df.movieId = df.movieId.apply(lambda x: movieid2idx[x])
df.userId = df.userId.apply(lambda x: userid2idx[x])
df.head()


Out[3]:
userId movieId rating timestamp
0 0 0 2.5 1260759144
1 0 1 3.0 1260759179
2 0 2 3.0 1260759182
3 0 3 2.0 1260759185
4 0 4 4.0 1260759205

In [4]:
nusers = df.userId.nunique()
nmovies = df.movieId.nunique()
nfactors = 50
nusers, nmovies


Out[4]:
(671, 9066)

In [5]:
def embedding_input(name, n_in, n_out, reg):
    """
    Creates a matrix for n_in x n_out of random numbers
    """
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)

In [6]:
user_in, u = embedding_input('user_in', nusers, nfactors, 1e-4)
movie_in, m = embedding_input('movie_in', nmovies, nfactors, 1e-4)

In [7]:
##Split into training and validation
msk = np.random.rand(len(df)) < 0.8
trn = df[msk]
val = df[~msk]

In [8]:
x = merge([u, m], mode='concat')
x = Flatten()(x)
x = Dense(70, activation='relu')(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')

In [9]:
nn.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, nb_epoch=8, 
          validation_data=([val.userId, val.movieId], val.rating))


Train on 80212 samples, validate on 19792 samples
Epoch 1/8
80212/80212 [==============================] - 6s - loss: 1.4778 - val_loss: 0.8467
Epoch 2/8
80212/80212 [==============================] - 6s - loss: 0.8051 - val_loss: 0.8326
Epoch 3/8
80212/80212 [==============================] - 6s - loss: 0.7722 - val_loss: 0.8240
Epoch 4/8
80212/80212 [==============================] - 6s - loss: 0.7524 - val_loss: 0.8299
Epoch 5/8
80212/80212 [==============================] - 6s - loss: 0.7361 - val_loss: 0.8293
Epoch 6/8
80212/80212 [==============================] - 6s - loss: 0.7209 - val_loss: 0.8378
Epoch 7/8
80212/80212 [==============================] - 6s - loss: 0.7005 - val_loss: 0.8433
Epoch 8/8
80212/80212 [==============================] - 6s - loss: 0.6788 - val_loss: 0.8493
Out[9]:
<keras.callbacks.History at 0x7ff84915f090>

In [ ]: