notebook.community

Edit and run



In [1]:

    
from theano.sandbox import cuda









    



WARNING (theano.sandbox.cuda): The cuda backend is deprecated and will be removed in the next release (v0.10).  Please switch to the gpuarray backend. You can get more information about how to switch at this URL:
 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)



In [2]:

    
import utils; reload(utils)
from utils import *









    



Using Theano backend.



In [3]:

    
ratings = pd.read_csv('data/ml-latest-small/ratings.csv')



In [4]:

    
ratings









    Out[4]:






  
    
      
      userId
      movieId
      rating
      timestamp
    
  
  
    
      0
      1
      31
      2.5
      1260759144
    
    
      1
      1
      1029
      3.0
      1260759179
    
    
      2
      1
      1061
      3.0
      1260759182
    
    
      3
      1
      1129
      2.0
      1260759185
    
    
      4
      1
      1172
      4.0
      1260759205
    
    
      5
      1
      1263
      2.0
      1260759151
    
    
      6
      1
      1287
      2.0
      1260759187
    
    
      7
      1
      1293
      2.0
      1260759148
    
    
      8
      1
      1339
      3.5
      1260759125
    
    
      9
      1
      1343
      2.0
      1260759131
    
    
      10
      1
      1371
      2.5
      1260759135
    
    
      11
      1
      1405
      1.0
      1260759203
    
    
      12
      1
      1953
      4.0
      1260759191
    
    
      13
      1
      2105
      4.0
      1260759139
    
    
      14
      1
      2150
      3.0
      1260759194
    
    
      15
      1
      2193
      2.0
      1260759198
    
    
      16
      1
      2294
      2.0
      1260759108
    
    
      17
      1
      2455
      2.5
      1260759113
    
    
      18
      1
      2968
      1.0
      1260759200
    
    
      19
      1
      3671
      3.0
      1260759117
    
    
      20
      2
      10
      4.0
      835355493
    
    
      21
      2
      17
      5.0
      835355681
    
    
      22
      2
      39
      5.0
      835355604
    
    
      23
      2
      47
      4.0
      835355552
    
    
      24
      2
      50
      4.0
      835355586
    
    
      25
      2
      52
      3.0
      835356031
    
    
      26
      2
      62
      3.0
      835355749
    
    
      27
      2
      110
      4.0
      835355532
    
    
      28
      2
      144
      3.0
      835356016
    
    
      29
      2
      150
      5.0
      835355395
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      99974
      671
      4034
      4.5
      1064245493
    
    
      99975
      671
      4306
      5.0
      1064245548
    
    
      99976
      671
      4308
      3.5
      1065111985
    
    
      99977
      671
      4880
      4.0
      1065111973
    
    
      99978
      671
      4886
      5.0
      1064245488
    
    
      99979
      671
      4896
      5.0
      1065111996
    
    
      99980
      671
      4963
      4.5
      1065111855
    
    
      99981
      671
      4973
      4.5
      1064245471
    
    
      99982
      671
      4993
      5.0
      1064245483
    
    
      99983
      671
      4995
      4.0
      1064891537
    
    
      99984
      671
      5010
      2.0
      1066793004
    
    
      99985
      671
      5218
      2.0
      1065111990
    
    
      99986
      671
      5299
      3.0
      1065112004
    
    
      99987
      671
      5349
      4.0
      1065111863
    
    
      99988
      671
      5377
      4.0
      1064245557
    
    
      99989
      671
      5445
      4.5
      1064891627
    
    
      99990
      671
      5464
      3.0
      1064891549
    
    
      99991
      671
      5669
      4.0
      1063502711
    
    
      99992
      671
      5816
      4.0
      1065111963
    
    
      99993
      671
      5902
      3.5
      1064245507
    
    
      99994
      671
      5952
      5.0
      1063502716
    
    
      99995
      671
      5989
      4.0
      1064890625
    
    
      99996
      671
      5991
      4.5
      1064245387
    
    
      99997
      671
      5995
      4.0
      1066793014
    
    
      99998
      671
      6212
      2.5
      1065149436
    
    
      99999
      671
      6268
      2.5
      1065579370
    
    
      100000
      671
      6269
      4.0
      1065149201
    
    
      100001
      671
      6365
      4.0
      1070940363
    
    
      100002
      671
      6385
      2.5
      1070979663
    
    
      100003
      671
      6565
      3.5
      1074784724
    
  

100004 rows × 4 columns



In [5]:

    
movie_names = pd.read_csv('data/ml-latest-small/movies.csv')



In [6]:

    
movie_names









    Out[6]:






  
    
      
      movieId
      title
      genres
    
  
  
    
      0
      1
      Toy Story (1995)
      Adventure|Animation|Children|Comedy|Fantasy
    
    
      1
      2
      Jumanji (1995)
      Adventure|Children|Fantasy
    
    
      2
      3
      Grumpier Old Men (1995)
      Comedy|Romance
    
    
      3
      4
      Waiting to Exhale (1995)
      Comedy|Drama|Romance
    
    
      4
      5
      Father of the Bride Part II (1995)
      Comedy
    
    
      5
      6
      Heat (1995)
      Action|Crime|Thriller
    
    
      6
      7
      Sabrina (1995)
      Comedy|Romance
    
    
      7
      8
      Tom and Huck (1995)
      Adventure|Children
    
    
      8
      9
      Sudden Death (1995)
      Action
    
    
      9
      10
      GoldenEye (1995)
      Action|Adventure|Thriller
    
    
      10
      11
      American President, The (1995)
      Comedy|Drama|Romance
    
    
      11
      12
      Dracula: Dead and Loving It (1995)
      Comedy|Horror
    
    
      12
      13
      Balto (1995)
      Adventure|Animation|Children
    
    
      13
      14
      Nixon (1995)
      Drama
    
    
      14
      15
      Cutthroat Island (1995)
      Action|Adventure|Romance
    
    
      15
      16
      Casino (1995)
      Crime|Drama
    
    
      16
      17
      Sense and Sensibility (1995)
      Drama|Romance
    
    
      17
      18
      Four Rooms (1995)
      Comedy
    
    
      18
      19
      Ace Ventura: When Nature Calls (1995)
      Comedy
    
    
      19
      20
      Money Train (1995)
      Action|Comedy|Crime|Drama|Thriller
    
    
      20
      21
      Get Shorty (1995)
      Comedy|Crime|Thriller
    
    
      21
      22
      Copycat (1995)
      Crime|Drama|Horror|Mystery|Thriller
    
    
      22
      23
      Assassins (1995)
      Action|Crime|Thriller
    
    
      23
      24
      Powder (1995)
      Drama|Sci-Fi
    
    
      24
      25
      Leaving Las Vegas (1995)
      Drama|Romance
    
    
      25
      26
      Othello (1995)
      Drama
    
    
      26
      27
      Now and Then (1995)
      Children|Drama
    
    
      27
      28
      Persuasion (1995)
      Drama|Romance
    
    
      28
      29
      City of Lost Children, The (Cité des enfants p...
      Adventure|Drama|Fantasy|Mystery|Sci-Fi
    
    
      29
      30
      Shanghai Triad (Yao a yao yao dao waipo qiao) ...
      Crime|Drama
    
    
      ...
      ...
      ...
      ...
    
    
      9095
      159690
      Teenage Mutant Ninja Turtles: Out of the Shado...
      Action|Adventure|Comedy
    
    
      9096
      159755
      Popstar: Never Stop Never Stopping (2016)
      Comedy
    
    
      9097
      159858
      The Conjuring 2 (2016)
      Horror
    
    
      9098
      159972
      Approaching the Unknown (2016)
      Drama|Sci-Fi|Thriller
    
    
      9099
      160080
      Ghostbusters (2016)
      Action|Comedy|Horror|Sci-Fi
    
    
      9100
      160271
      Central Intelligence (2016)
      Action|Comedy
    
    
      9101
      160438
      Jason Bourne (2016)
      Action
    
    
      9102
      160440
      The Maid's Room (2014)
      Thriller
    
    
      9103
      160563
      The Legend of Tarzan (2016)
      Action|Adventure
    
    
      9104
      160565
      The Purge: Election Year (2016)
      Action|Horror|Sci-Fi
    
    
      9105
      160567
      Mike & Dave Need Wedding Dates (2016)
      Comedy
    
    
      9106
      160590
      Survive and Advance (2013)
      (no genres listed)
    
    
      9107
      160656
      Tallulah (2016)
      Drama
    
    
      9108
      160718
      Piper (2016)
      Animation
    
    
      9109
      160954
      Nerve (2016)
      Drama|Thriller
    
    
      9110
      161084
      My Friend Rockefeller (2015)
      Documentary
    
    
      9111
      161155
      Sunspring (2016)
      Sci-Fi
    
    
      9112
      161336
      Author: The JT LeRoy Story (2016)
      Documentary
    
    
      9113
      161582
      Hell or High Water (2016)
      Crime|Drama
    
    
      9114
      161594
      Kingsglaive: Final Fantasy XV (2016)
      Action|Adventure|Animation|Drama|Fantasy|Sci-Fi
    
    
      9115
      161830
      Body (2015)
      Drama|Horror|Thriller
    
    
      9116
      161918
      Sharknado 4: The 4th Awakens (2016)
      Action|Adventure|Horror|Sci-Fi
    
    
      9117
      161944
      The Last Brickmaker in America (2001)
      Drama
    
    
      9118
      162376
      Stranger Things
      Drama
    
    
      9119
      162542
      Rustom (2016)
      Romance|Thriller
    
    
      9120
      162672
      Mohenjo Daro (2016)
      Adventure|Drama|Romance
    
    
      9121
      163056
      Shin Godzilla (2016)
      Action|Adventure|Fantasy|Sci-Fi
    
    
      9122
      163949
      The Beatles: Eight Days a Week - The Touring Y...
      Documentary
    
    
      9123
      164977
      The Gay Desperado (1936)
      Comedy
    
    
      9124
      164979
      Women of '69, Unboxed
      Documentary
    
  

9125 rows × 3 columns



In [7]:

    
# changing the movie and user ids so they are continuous integers
users = ratings.userId.unique()
movies = ratings.movieId.unique()



In [8]:

    
userid2idx = {o:i for i,o in enumerate(users)}



In [9]:

    
userid2idx



In [10]:

    
movieid2idx = {o:i for i,o in enumerate(movies)}



In [11]:

    
ratings.movieId = ratings.movieId.apply(lambda x: movieid2idx[x])
ratings.userId = ratings.userId.apply(lambda x: userid2idx[x])



In [12]:

    
ratings









    Out[12]:






  
    
      
      userId
      movieId
      rating
      timestamp
    
  
  
    
      0
      0
      0
      2.5
      1260759144
    
    
      1
      0
      1
      3.0
      1260759179
    
    
      2
      0
      2
      3.0
      1260759182
    
    
      3
      0
      3
      2.0
      1260759185
    
    
      4
      0
      4
      4.0
      1260759205
    
    
      5
      0
      5
      2.0
      1260759151
    
    
      6
      0
      6
      2.0
      1260759187
    
    
      7
      0
      7
      2.0
      1260759148
    
    
      8
      0
      8
      3.5
      1260759125
    
    
      9
      0
      9
      2.0
      1260759131
    
    
      10
      0
      10
      2.5
      1260759135
    
    
      11
      0
      11
      1.0
      1260759203
    
    
      12
      0
      12
      4.0
      1260759191
    
    
      13
      0
      13
      4.0
      1260759139
    
    
      14
      0
      14
      3.0
      1260759194
    
    
      15
      0
      15
      2.0
      1260759198
    
    
      16
      0
      16
      2.0
      1260759108
    
    
      17
      0
      17
      2.5
      1260759113
    
    
      18
      0
      18
      1.0
      1260759200
    
    
      19
      0
      19
      3.0
      1260759117
    
    
      20
      1
      20
      4.0
      835355493
    
    
      21
      1
      21
      5.0
      835355681
    
    
      22
      1
      22
      5.0
      835355604
    
    
      23
      1
      23
      4.0
      835355552
    
    
      24
      1
      24
      4.0
      835355586
    
    
      25
      1
      25
      3.0
      835356031
    
    
      26
      1
      26
      3.0
      835355749
    
    
      27
      1
      27
      4.0
      835355532
    
    
      28
      1
      28
      3.0
      835356016
    
    
      29
      1
      29
      5.0
      835355395
    
    
      ...
      ...
      ...
      ...
      ...
    
    
      99974
      670
      473
      4.5
      1064245493
    
    
      99975
      670
      354
      5.0
      1064245548
    
    
      99976
      670
      355
      3.5
      1065111985
    
    
      99977
      670
      5577
      4.0
      1065111973
    
    
      99978
      670
      477
      5.0
      1064245488
    
    
      99979
      670
      478
      5.0
      1065111996
    
    
      99980
      670
      358
      4.5
      1065111855
    
    
      99981
      670
      479
      4.5
      1064245471
    
    
      99982
      670
      480
      5.0
      1064245483
    
    
      99983
      670
      359
      4.0
      1064891537
    
    
      99984
      670
      1225
      2.0
      1066793004
    
    
      99985
      670
      1240
      2.0
      1065111990
    
    
      99986
      670
      361
      3.0
      1065112004
    
    
      99987
      670
      126
      4.0
      1065111863
    
    
      99988
      670
      1260
      4.0
      1064245557
    
    
      99989
      670
      483
      4.5
      1064891627
    
    
      99990
      670
      362
      3.0
      1064891549
    
    
      99991
      670
      127
      4.0
      1063502711
    
    
      99992
      670
      364
      4.0
      1065111963
    
    
      99993
      670
      1299
      3.5
      1064245507
    
    
      99994
      670
      412
      5.0
      1063502716
    
    
      99995
      670
      486
      4.0
      1064890625
    
    
      99996
      670
      1308
      4.5
      1064245387
    
    
      99997
      670
      365
      4.0
      1066793014
    
    
      99998
      670
      2930
      2.5
      1065149436
    
    
      99999
      670
      7005
      2.5
      1065579370
    
    
      100000
      670
      4771
      4.0
      1065149201
    
    
      100001
      670
      1329
      4.0
      1070940363
    
    
      100002
      670
      1331
      2.5
      1070979663
    
    
      100003
      670
      2946
      3.5
      1074784724
    
  

100004 rows × 4 columns



In [13]:

    
ratings.shape









    Out[13]:





(100004, 4)



In [14]:

    
# split into training and test
msk = np.random.choice([True, False], size = 100004, p=[0.8,0.2])



In [15]:

    
sum(msk)









    Out[15]:





80116



In [16]:

    
training_data = ratings[msk]
val_data = ratings[~msk]



In [17]:

    
val_data.shape









    Out[17]:





(19888, 4)



In [18]:

    
??Embedding

https://keras.io/layers/embeddings/

Dot product

https://keras.io/getting-started/functional-api-guide/



In [19]:

    
user_in = Input(shape=(1,), dtype='int64', name='user_in')



In [20]:

    
user_in









    Out[20]:





user_in



In [21]:

    
type(user_in)









    Out[21]:





theano.tensor.var.TensorVariable



In [22]:

    
Embedding(ratings.userId.nunique(), 50, input_length=1, W_regularizer=l2(1e-4))









    Out[22]:





<keras.layers.embeddings.Embedding at 0x7fe214965610>



In [23]:

    
user_embedding = Embedding(training_data.userId.nunique(), 50, input_length=1, W_regularizer=l2(1e-4))(user_in)



In [24]:

    
user_embedding









    Out[24]:





Reshape{3}.0



In [25]:

    
type(user_embedding)









    Out[25]:





theano.tensor.var.TensorVariable



In [26]:

    
type(Sequential())









    Out[26]:





keras.models.Sequential



In [27]:

    
movie_in = Input(shape=(1,), dtype='int64', name='movie_in')



In [28]:

    
movie_embedding = Embedding(ratings.movieId.nunique(), 50, input_length=1, W_regularizer=l2(1e-4))(movie_in)



In [29]:

    
x = merge([user_embedding, movie_embedding], mode = 'dot')



In [30]:

    
x









    Out[30]:





Reshape{3}.0



In [31]:

    
x = Flatten()(x)



In [32]:

    
x









    Out[32]:





Reshape{2}.0



In [33]:

    
model = Model([user_in, movie_in], x)

https://keras.io/models/model/



In [34]:

    
model.compile(Adam(0.001), loss='mse')



In [35]:

    
model.summary()









    



____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
user_in (InputLayer)             (None, 1)             0                                            
____________________________________________________________________________________________________
movie_in (InputLayer)            (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_2 (Embedding)          (None, 1, 50)         33550       user_in[0][0]                    
____________________________________________________________________________________________________
embedding_3 (Embedding)          (None, 1, 50)         453300      movie_in[0][0]                   
____________________________________________________________________________________________________
merge_1 (Merge)                  (None, 1, 1)          0           embedding_2[0][0]                
                                                                   embedding_3[0][0]                
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 1)             0           merge_1[0][0]                    
====================================================================================================
Total params: 486,850
Trainable params: 486,850
Non-trainable params: 0
____________________________________________________________________________________________________



In [36]:

    
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, nb_epoch=1, verbose=2,
          validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/1
7s - loss: 9.9186 - val_loss: 4.2839






    Out[36]:





<keras.callbacks.History at 0x7fe20e498d10>



In [37]:

    
model.optimizer.lr=0.01



In [39]:

    
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, nb_epoch=1, verbose=2,
          validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/1
7s - loss: 3.1207 - val_loss: 2.8445






    Out[39]:





<keras.callbacks.History at 0x7fe20d475bd0>



In [40]:

    
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, nb_epoch=2, verbose=2,
          validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/2
7s - loss: 2.4113 - val_loss: 2.6472
Epoch 2/2
7s - loss: 2.2535 - val_loss: 2.6020






    Out[40]:





<keras.callbacks.History at 0x7fe20d475a10>



In [41]:

    
model.optimizer.lr=0.001
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, nb_epoch=6, verbose=2,
          validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/6
7s - loss: 2.1946 - val_loss: 2.5850
Epoch 2/6
7s - loss: 2.1625 - val_loss: 2.5844
Epoch 3/6
7s - loss: 2.1375 - val_loss: 2.5812
Epoch 4/6
7s - loss: 2.1128 - val_loss: 2.5873
Epoch 5/6
7s - loss: 2.0879 - val_loss: 2.5890
Epoch 6/6
7s - loss: 2.0627 - val_loss: 2.5946






    Out[41]:





<keras.callbacks.History at 0x7fe20d475950>

Bias



In [47]:

    
def embedding_input(name, n_in, n_out, reg):
    inp = Input(shape=(1,), dtype='int64', name=name)
    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)



In [48]:

    
user_in, u = embedding_input('user_in', ratings.userId.nunique(), 50, 1e-4)
movie_in, m = embedding_input('movie_in', ratings.movieId.nunique(), 50, 1e-4)



In [49]:

    
def create_bias(inp, n_in):
    x = Embedding(n_in, 1, input_length=1)(inp)
    return Flatten()(x)



In [58]:

    
u_b = create_bias(user_in, ratings.userId.nunique())
m_b = create_bias(movie_in, ratings.movieId.nunique())



In [59]:

    
x = merge([u, m], mode = 'dot')



In [60]:

    
x = Flatten()(x)



In [61]:

    
x = merge([x, u_b], mode = 'sum')



In [62]:

    
x = merge([x, m_b], mode = 'sum')



In [65]:

    
model = Model([user_in, movie_in], x)



In [66]:

    
model.compile(Adam(0.001), loss='mse')



In [67]:

    
model.summary()









    



____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
user_in (InputLayer)             (None, 1)             0                                            
____________________________________________________________________________________________________
movie_in (InputLayer)            (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_6 (Embedding)          (None, 1, 50)         33550       user_in[0][0]                    
____________________________________________________________________________________________________
embedding_7 (Embedding)          (None, 1, 50)         453300      movie_in[0][0]                   
____________________________________________________________________________________________________
merge_6 (Merge)                  (None, 1, 1)          0           embedding_6[0][0]                
                                                                   embedding_7[0][0]                
____________________________________________________________________________________________________
embedding_10 (Embedding)         (None, 1, 1)          671         user_in[0][0]                    
____________________________________________________________________________________________________
flatten_8 (Flatten)              (None, 1)             0           merge_6[0][0]                    
____________________________________________________________________________________________________
flatten_6 (Flatten)              (None, 1)             0           embedding_10[0][0]               
____________________________________________________________________________________________________
embedding_11 (Embedding)         (None, 1, 1)          9066        movie_in[0][0]                   
____________________________________________________________________________________________________
merge_7 (Merge)                  (None, 1)             0           flatten_8[0][0]                  
                                                                   flatten_6[0][0]                  
____________________________________________________________________________________________________
flatten_7 (Flatten)              (None, 1)             0           embedding_11[0][0]               
____________________________________________________________________________________________________
merge_8 (Merge)                  (None, 1)             0           merge_7[0][0]                    
                                                                   flatten_7[0][0]                  
====================================================================================================
Total params: 496,587
Trainable params: 496,587
Non-trainable params: 0
____________________________________________________________________________________________________



In [69]:

    
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, verbose=2, nb_epoch=1,
         validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/1
6s - loss: 8.9597 - val_loss: 3.5969






    Out[69]:





<keras.callbacks.History at 0x7fe20c442750>



In [71]:

    
model.optimizer.lr=0.01
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, verbose=2, nb_epoch=6,
         validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/6
6s - loss: 2.5970 - val_loss: 2.3228
Epoch 2/6
6s - loss: 1.9807 - val_loss: 2.1142
Epoch 3/6
6s - loss: 1.8173 - val_loss: 2.0222
Epoch 4/6
6s - loss: 1.7249 - val_loss: 1.9563
Epoch 5/6
6s - loss: 1.6464 - val_loss: 1.8829
Epoch 6/6
6s - loss: 1.5732 - val_loss: 1.8187






    Out[71]:





<keras.callbacks.History at 0x7fe20bebc050>



In [72]:

    
model.optimizer.lr=0.001
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, verbose=2, nb_epoch=6,
         validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/6
6s - loss: 1.5030 - val_loss: 1.7575
Epoch 2/6
6s - loss: 1.4348 - val_loss: 1.6957
Epoch 3/6
6s - loss: 1.3677 - val_loss: 1.6354
Epoch 4/6
6s - loss: 1.3026 - val_loss: 1.5786
Epoch 5/6
6s - loss: 1.2399 - val_loss: 1.5263
Epoch 6/6
6s - loss: 1.1800 - val_loss: 1.4764






    Out[72]:





<keras.callbacks.History at 0x7fe20c442b50>



In [73]:

    
model.fit([training_data.userId, training_data.movieId], training_data.rating,
          batch_size=64, verbose=2, nb_epoch=5,
         validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/5
6s - loss: 1.1224 - val_loss: 1.4281
Epoch 2/5
6s - loss: 1.0685 - val_loss: 1.3873
Epoch 3/5
6s - loss: 1.0163 - val_loss: 1.3435
Epoch 4/5
6s - loss: 0.9677 - val_loss: 1.3056
Epoch 5/5
6s - loss: 0.9213 - val_loss: 1.2700






    Out[73]:





<keras.callbacks.History at 0x7fe20d23ab50>

Neural net



In [74]:

    
user_in, u = embedding_input('user_in', ratings.userId.nunique(), 50, 1e-4)
movie_in, m = embedding_input('movie_in', ratings.movieId.nunique(), 50, 1e-4)



In [75]:

    
x = merge([u, m], mode = 'concat')
x = Flatten()(x)
x = Dropout(0.3)(x)
x = Dense(70, activation='relu')(x)
x = Dropout(0.75)(x)
x = Dense(1)(x)
nn = Model([user_in, movie_in], x)
nn.compile(Adam(0.001), loss='mse')



In [76]:

    
nn.summary()









    



____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
user_in (InputLayer)             (None, 1)             0                                            
____________________________________________________________________________________________________
movie_in (InputLayer)            (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_12 (Embedding)         (None, 1, 50)         33550       user_in[0][0]                    
____________________________________________________________________________________________________
embedding_13 (Embedding)         (None, 1, 50)         453300      movie_in[0][0]                   
____________________________________________________________________________________________________
merge_9 (Merge)                  (None, 1, 100)        0           embedding_12[0][0]               
                                                                   embedding_13[0][0]               
____________________________________________________________________________________________________
flatten_9 (Flatten)              (None, 100)           0           merge_9[0][0]                    
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 100)           0           flatten_9[0][0]                  
____________________________________________________________________________________________________
dense_1 (Dense)                  (None, 70)            7070        dropout_1[0][0]                  
____________________________________________________________________________________________________
dropout_2 (Dropout)              (None, 70)            0           dense_1[0][0]                    
____________________________________________________________________________________________________
dense_2 (Dense)                  (None, 1)             71          dropout_2[0][0]                  
====================================================================================================
Total params: 493,991
Trainable params: 493,991
Non-trainable params: 0
____________________________________________________________________________________________________



In [77]:

    
nn.fit([training_data.userId, training_data.movieId], training_data.rating,
       batch_size=64, verbose=2, nb_epoch=5,
       validation_data=([val_data.userId, val_data.movieId], val_data.rating))









    



Train on 80116 samples, validate on 19888 samples
Epoch 1/5
8s - loss: 2.4699 - val_loss: 0.9121
Epoch 2/5
8s - loss: 1.4852 - val_loss: 0.8783
Epoch 3/5
8s - loss: 1.2341 - val_loss: 0.8515
Epoch 4/5
8s - loss: 1.0430 - val_loss: 0.8546
Epoch 5/5
8s - loss: 0.9138 - val_loss: 0.8333






    Out[77]:





<keras.callbacks.History at 0x7fe2064f8a10>



In [ ]:

	userId	movieId	rating	timestamp
0	1	31	2.5	1260759144
1	1	1029	3.0	1260759179
2	1	1061	3.0	1260759182
3	1	1129	2.0	1260759185
4	1	1172	4.0	1260759205
5	1	1263	2.0	1260759151
6	1	1287	2.0	1260759187
7	1	1293	2.0	1260759148
8	1	1339	3.5	1260759125
9	1	1343	2.0	1260759131
10	1	1371	2.5	1260759135
11	1	1405	1.0	1260759203
12	1	1953	4.0	1260759191
13	1	2105	4.0	1260759139
14	1	2150	3.0	1260759194
15	1	2193	2.0	1260759198
16	1	2294	2.0	1260759108
17	1	2455	2.5	1260759113
18	1	2968	1.0	1260759200
19	1	3671	3.0	1260759117
20	2	10	4.0	835355493
21	2	17	5.0	835355681
22	2	39	5.0	835355604
23	2	47	4.0	835355552
24	2	50	4.0	835355586
25	2	52	3.0	835356031
26	2	62	3.0	835355749
27	2	110	4.0	835355532
28	2	144	3.0	835356016
29	2	150	5.0	835355395
...	...	...	...	...
99974	671	4034	4.5	1064245493
99975	671	4306	5.0	1064245548
99976	671	4308	3.5	1065111985
99977	671	4880	4.0	1065111973
99978	671	4886	5.0	1064245488
99979	671	4896	5.0	1065111996
99980	671	4963	4.5	1065111855
99981	671	4973	4.5	1064245471
99982	671	4993	5.0	1064245483
99983	671	4995	4.0	1064891537
99984	671	5010	2.0	1066793004
99985	671	5218	2.0	1065111990
99986	671	5299	3.0	1065112004
99987	671	5349	4.0	1065111863
99988	671	5377	4.0	1064245557
99989	671	5445	4.5	1064891627
99990	671	5464	3.0	1064891549
99991	671	5669	4.0	1063502711
99992	671	5816	4.0	1065111963
99993	671	5902	3.5	1064245507
99994	671	5952	5.0	1063502716
99995	671	5989	4.0	1064890625
99996	671	5991	4.5	1064245387
99997	671	5995	4.0	1066793014
99998	671	6212	2.5	1065149436
99999	671	6268	2.5	1065579370
100000	671	6269	4.0	1065149201
100001	671	6365	4.0	1070940363
100002	671	6385	2.5	1070979663
100003	671	6565	3.5	1074784724

	movieId	title	genres
0	1	Toy Story (1995)	Adventure\|Animation\|Children\|Comedy\|Fantasy
1	2	Jumanji (1995)	Adventure\|Children\|Fantasy
2	3	Grumpier Old Men (1995)	Comedy\|Romance
3	4	Waiting to Exhale (1995)	Comedy\|Drama\|Romance
4	5	Father of the Bride Part II (1995)	Comedy
5	6	Heat (1995)	Action\|Crime\|Thriller
6	7	Sabrina (1995)	Comedy\|Romance
7	8	Tom and Huck (1995)	Adventure\|Children
8	9	Sudden Death (1995)	Action
9	10	GoldenEye (1995)	Action\|Adventure\|Thriller
10	11	American President, The (1995)	Comedy\|Drama\|Romance
11	12	Dracula: Dead and Loving It (1995)	Comedy\|Horror
12	13	Balto (1995)	Adventure\|Animation\|Children
13	14	Nixon (1995)	Drama
14	15	Cutthroat Island (1995)	Action\|Adventure\|Romance
15	16	Casino (1995)	Crime\|Drama
16	17	Sense and Sensibility (1995)	Drama\|Romance
17	18	Four Rooms (1995)	Comedy
18	19	Ace Ventura: When Nature Calls (1995)	Comedy
19	20	Money Train (1995)	Action\|Comedy\|Crime\|Drama\|Thriller
20	21	Get Shorty (1995)	Comedy\|Crime\|Thriller
21	22	Copycat (1995)	Crime\|Drama\|Horror\|Mystery\|Thriller
22	23	Assassins (1995)	Action\|Crime\|Thriller
23	24	Powder (1995)	Drama\|Sci-Fi
24	25	Leaving Las Vegas (1995)	Drama\|Romance
25	26	Othello (1995)	Drama
26	27	Now and Then (1995)	Children\|Drama
27	28	Persuasion (1995)	Drama\|Romance
28	29	City of Lost Children, The (Cité des enfants p...	Adventure\|Drama\|Fantasy\|Mystery\|Sci-Fi
29	30	Shanghai Triad (Yao a yao yao dao waipo qiao) ...	Crime\|Drama
...	...	...	...
9095	159690	Teenage Mutant Ninja Turtles: Out of the Shado...	Action\|Adventure\|Comedy
9096	159755	Popstar: Never Stop Never Stopping (2016)	Comedy
9097	159858	The Conjuring 2 (2016)	Horror
9098	159972	Approaching the Unknown (2016)	Drama\|Sci-Fi\|Thriller
9099	160080	Ghostbusters (2016)	Action\|Comedy\|Horror\|Sci-Fi
9100	160271	Central Intelligence (2016)	Action\|Comedy
9101	160438	Jason Bourne (2016)	Action
9102	160440	The Maid's Room (2014)	Thriller
9103	160563	The Legend of Tarzan (2016)	Action\|Adventure
9104	160565	The Purge: Election Year (2016)	Action\|Horror\|Sci-Fi
9105	160567	Mike & Dave Need Wedding Dates (2016)	Comedy
9106	160590	Survive and Advance (2013)	(no genres listed)
9107	160656	Tallulah (2016)	Drama
9108	160718	Piper (2016)	Animation
9109	160954	Nerve (2016)	Drama\|Thriller
9110	161084	My Friend Rockefeller (2015)	Documentary
9111	161155	Sunspring (2016)	Sci-Fi
9112	161336	Author: The JT LeRoy Story (2016)	Documentary
9113	161582	Hell or High Water (2016)	Crime\|Drama
9114	161594	Kingsglaive: Final Fantasy XV (2016)	Action\|Adventure\|Animation\|Drama\|Fantasy\|Sci-Fi
9115	161830	Body (2015)	Drama\|Horror\|Thriller
9116	161918	Sharknado 4: The 4th Awakens (2016)	Action\|Adventure\|Horror\|Sci-Fi
9117	161944	The Last Brickmaker in America (2001)	Drama
9118	162376	Stranger Things	Drama
9119	162542	Rustom (2016)	Romance\|Thriller
9120	162672	Mohenjo Daro (2016)	Adventure\|Drama\|Romance
9121	163056	Shin Godzilla (2016)	Action\|Adventure\|Fantasy\|Sci-Fi
9122	163949	The Beatles: Eight Days a Week - The Touring Y...	Documentary
9123	164977	The Gay Desperado (1936)	Comedy
9124	164979	Women of '69, Unboxed	Documentary