notebook.community

Edit and run



In [1]:

    
from theano.sandbox import cuda









    



Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)
/home/ubuntu/anaconda2/lib/python2.7/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.
  warnings.warn(warn)



In [2]:

    
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function
import pandas as pd









    



Using Theano backend.



In [3]:

    
df = pd.read_csv('./data/movielens/sample/ratings.csv')
users = df.userId.unique()
movies = df.movieId.unique()
userid2idx = {o:i for i,o in enumerate(users)}
movieid2idx = {o:i for i,o in enumerate(movies)}
df.movieId = df.movieId.apply(lambda x: movieid2idx[x])
df.userId = df.userId.apply(lambda x: userid2idx[x])
df.head()



In [4]:

    
nusers = df.userId.nunique()
nmovies = df.movieId.nunique()
nfactors = 50
nusers, nmovies









    Out[4]:





(671, 9066)



In [5]:

    
def embedding_input(name, n_in, n_out, reg):
    """
    Creates a matrix for n_in x n_out of random numbers
    """
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)



In [6]:

    
user_in, u = embedding_input('user_in', nusers, nfactors, 1e-4)
movie_in, m = embedding_input('movie_in', nmovies, nfactors, 1e-4)



In [7]:

    
##Split into training and validation
msk = np.random.rand(len(df)) < 0.8
trn = df[msk]
val = df[~msk]



In [8]:

    
x = merge([u, m], mode='concat')
x = Flatten()(x)
x = Dense(70, activation='relu')(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')



In [9]:

    
nn.fit([trn.userId, trn.movieId], trn.rating, batch_size=64, nb_epoch=8, 
          validation_data=([val.userId, val.movieId], val.rating))









    



Train on 80212 samples, validate on 19792 samples
Epoch 1/8
80212/80212 [==============================] - 6s - loss: 1.4778 - val_loss: 0.8467
Epoch 2/8
80212/80212 [==============================] - 6s - loss: 0.8051 - val_loss: 0.8326
Epoch 3/8
80212/80212 [==============================] - 6s - loss: 0.7722 - val_loss: 0.8240
Epoch 4/8
80212/80212 [==============================] - 6s - loss: 0.7524 - val_loss: 0.8299
Epoch 5/8
80212/80212 [==============================] - 6s - loss: 0.7361 - val_loss: 0.8293
Epoch 6/8
80212/80212 [==============================] - 6s - loss: 0.7209 - val_loss: 0.8378
Epoch 7/8
80212/80212 [==============================] - 6s - loss: 0.7005 - val_loss: 0.8433
Epoch 8/8
80212/80212 [==============================] - 6s - loss: 0.6788 - val_loss: 0.8493






    Out[9]:





<keras.callbacks.History at 0x7ff84915f090>



In [ ]:

	movieId	rating	timestamp
0	0	2.5	1260759144
1	1	3.0	1260759179
2	2	3.0	1260759182
3	3	2.0	1260759185
4	4	4.0	1260759205