notebook.community

Edit and run



In [1]:

    
import pandas as pd
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import numpy as np

# pass in column names for each CSV as the column name is not given in the file and read them using pandas.
# You can check the column names from the readme file

#Reading users file:
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names=u_cols,encoding='latin-1')

#Reading ratings file:
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=r_cols,encoding='latin-1')

#Reading items file:
i_cols = ['movie id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
items = pd.read_csv('ml-100k/u.item', sep='|', names=i_cols,
encoding='latin-1')



In [2]:

    
print(users.shape)
users.head()



In [3]:

    
print(ratings.shape)
ratings.head()









    



(100000, 4)






    Out[3]:







  
    
      
      user_id
      movie_id
      rating
      unix_timestamp
    
  
  
    
      0
      196
      242
      3
      881250949
    
    
      1
      186
      302
      3
      891717742
    
    
      2
      22
      377
      1
      878887116
    
    
      3
      244
      51
      2
      880606923
    
    
      4
      166
      346
      1
      886397596



In [4]:

    
print(items.shape)
items.head()









    



(1682, 24)






    Out[4]:







  
    
      
      movie id
      movie title
      release date
      video release date
      IMDb URL
      unknown
      Action
      Adventure
      Animation
      Children's
      ...
      Fantasy
      Film-Noir
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      0
      1
      Toy Story (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Toy%20Story%2...
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      GoldenEye (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?GoldenEye%20(...
      0
      1
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      2
      3
      Four Rooms (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Four%20Rooms%...
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      4
      Get Shorty (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Get%20Shorty%...
      0
      1
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4
      5
      Copycat (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Copycat%20(1995)
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 24 columns



In [5]:

    
# Load Train and Test datasets
r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings_train = pd.read_csv('ml-100k/ua.base', sep='\t', names=r_cols, encoding='latin-1')
ratings_test = pd.read_csv('ml-100k/ua.test', sep='\t', names=r_cols, encoding='latin-1')
ratings_train.shape, ratings_test.shape









    Out[5]:





((90570, 4), (9430, 4))



In [6]:

    
# Building collaborative filtering model from scratch

'''
We will recommend movies based on user-user similarity and 
item-item similarity. For that, first we need to calculate 
the number of unique users and movies.
'''

n_users = ratings.user_id.unique().shape[0]
n_items = ratings.movie_id.unique().shape[0]



In [8]:

    
'''
Now, we will create a user-item matrix which can be used 
to calculate the similarity between users and items.
'''
data_matrix = np.zeros((n_users, n_items))
for line in ratings.itertuples():
    data_matrix[line[1]-1, line[2]-1] = line[3]



In [11]:

    
'''Now, we will calculate the similarity. We can use the 
pairwise_distance function from sklearn to calculate the cosine similarity.
'''
from sklearn.metrics.pairwise import pairwise_distances 
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')



In [15]:

    
'''
This gives us the item-item and user-user similarity in an array form. 
The next step is to make predictions based on these similarities. 
Let’s define a function to do just that.
'''
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)
        #We use np.newaxis so that mean_user_rating has same format as ratings
        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'item':
        pred = ratings.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred



In [16]:

    
'''Finally, we will make predictions based on user similarity and item similarity.'''
user_prediction = predict(data_matrix, user_similarity, type='user')
item_prediction = predict(data_matrix, item_similarity, type='item')



In [ ]:



In [29]:

    
pd.DataFrame(user_prediction)









    Out[29]:







  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      1672
      1673
      1674
      1675
      1676
      1677
      1678
      1679
      1680
      1681
    
  
  
    
      0
      2.065326
      0.734303
      0.629924
      1.010669
      0.640686
      0.476150
      1.784569
      1.163032
      1.513350
      0.704478
      ...
      0.394041
      0.394434
      0.393981
      0.392972
      0.393344
      0.392272
      0.394909
      0.393590
      0.393049
      0.392771
    
    
      1
      1.763088
      0.384040
      0.196179
      0.731538
      0.225643
      0.003892
      1.493597
      0.876153
      1.108467
      0.261991
      ...
      -0.086942
      -0.085491
      -0.087137
      -0.088158
      -0.087298
      -0.089288
      -0.087468
      -0.088378
      -0.086918
      -0.086712
    
    
      2
      1.795904
      0.329047
      0.158829
      0.684154
      0.173277
      -0.035621
      1.488230
      0.835769
      1.135426
      0.236383
      ...
      -0.134795
      -0.133537
      -0.135543
      -0.136438
      -0.135041
      -0.137611
      -0.136374
      -0.136992
      -0.134969
      -0.134765
    
    
      3
      1.729951
      0.293913
      0.127741
      0.644932
      0.142143
      -0.062261
      1.437010
      0.796249
      1.096663
      0.211789
      ...
      -0.161413
      -0.160220
      -0.161542
      -0.162586
      -0.161634
      -0.163877
      -0.162283
      -0.163080
      -0.161442
      -0.161248
    
    
      4
      1.796651
      0.454474
      0.354422
      0.763130
      0.359539
      0.195987
      1.547370
      0.908904
      1.292027
      0.437954
      ...
      0.101762
      0.102405
      0.101923
      0.100839
      0.101711
      0.099951
      0.102515
      0.101233
      0.101075
      0.101201
    
    
      5
      1.996889
      0.651172
      0.528276
      0.921054
      0.537388
      0.343124
      1.722899
      1.030714
      1.375829
      0.570538
      ...
      0.261308
      0.261053
      0.261749
      0.260694
      0.260574
      0.259848
      0.262375
      0.261111
      0.261061
      0.261089
    
    
      6
      2.466055
      1.099186
      1.016489
      1.362861
      1.006458
      0.853382
      2.180302
      1.498903
      1.891000
      1.072978
      ...
      0.765553
      0.765650
      0.766179
      0.765129
      0.765580
      0.764340
      0.766967
      0.765654
      0.764983
      0.765165
    
    
      7
      1.693486
      0.296285
      0.195398
      0.612075
      0.198638
      0.025983
      1.400383
      0.778876
      1.136172
      0.278078
      ...
      -0.070712
      -0.070081
      -0.071161
      -0.072135
      -0.071083
      -0.072866
      -0.070435
      -0.071651
      -0.071744
      -0.071357
    
    
      8
      1.681165
      0.279528
      0.121887
      0.616094
      0.136368
      -0.075169
      1.384177
      0.755221
      1.050439
      0.191584
      ...
      -0.161335
      -0.160330
      -0.161420
      -0.162461
      -0.161416
      -0.163505
      -0.161425
      -0.162465
      -0.161396
      -0.161430
    
    
      9
      2.021688
      0.650911
      0.530669
      0.916215
      0.531720
      0.348246
      1.722691
      1.048935
      1.389699
      0.578968
      ...
      0.264904
      0.264997
      0.265291
      0.264285
      0.264545
      0.263527
      0.266037
      0.264782
      0.264589
      0.264781
    
    
      10
      1.929299
      0.556882
      0.434658
      0.852914
      0.440983
      0.265378
      1.664166
      0.981898
      1.323793
      0.503246
      ...
      0.173347
      0.174086
      0.173556
      0.172490
      0.173118
      0.171513
      0.173820
      0.172667
      0.172741
      0.172829
    
    
      11
      1.678390
      0.314754
      0.197958
      0.612868
      0.201247
      0.023850
      1.433966
      0.756071
      1.103218
      0.277791
      ...
      -0.070096
      -0.068885
      -0.069612
      -0.070621
      -0.069686
      -0.071449
      -0.069072
      -0.070261
      -0.070224
      -0.069986
    
    
      12
      2.712718
      1.329131
      1.233461
      1.605217
      1.234788
      1.069379
      2.431112
      1.749882
      2.125160
      1.299373
      ...
      0.982402
      0.982351
      0.982131
      0.981219
      0.982038
      0.980475
      0.982634
      0.981554
      0.981412
      0.981683
    
    
      13
      1.788173
      0.441983
      0.298060
      0.731027
      0.315628
      0.121110
      1.486196
      0.868834
      1.177206
      0.361001
      ...
      0.034769
      0.034984
      0.034865
      0.033763
      0.033961
      0.032813
      0.035323
      0.034068
      0.034362
      0.034297
    
    
      14
      1.760710
      0.413425
      0.233431
      0.763382
      0.262370
      0.051651
      1.509711
      0.905781
      1.132220
      0.310825
      ...
      -0.042489
      -0.041248
      -0.042949
      -0.043921
      -0.043102
      -0.044849
      -0.042814
      -0.043831
      -0.042679
      -0.042507
    
    
      15
      1.867527
      0.521319
      0.421546
      0.793889
      0.417516
      0.259170
      1.593142
      0.936810
      1.299501
      0.500552
      ...
      0.167283
      0.167748
      0.167330
      0.166346
      0.166951
      0.165676
      0.168274
      0.166975
      0.166552
      0.166812
    
    
      16
      1.600336
      0.284676
      0.100644
      0.622172
      0.132878
      -0.079807
      1.312474
      0.770614
      0.977713
      0.177197
      ...
      -0.168594
      -0.167491
      -0.168589
      -0.169773
      -0.169418
      -0.170971
      -0.168635
      -0.169803
      -0.168776
      -0.169174
    
    
      17
      2.159141
      0.821841
      0.712117
      1.077871
      0.714946
      0.526902
      1.910413
      1.185414
      1.554596
      0.749567
      ...
      0.449683
      0.449640
      0.450485
      0.449374
      0.449676
      0.448488
      0.451165
      0.449827
      0.449474
      0.449380
    
    
      18
      1.686697
      0.259854
      0.109146
      0.576473
      0.120221
      -0.077723
      1.401744
      0.715701
      1.062269
      0.183672
      ...
      -0.173982
      -0.172898
      -0.174075
      -0.175106
      -0.174050
      -0.176190
      -0.174236
      -0.175213
      -0.174338
      -0.173941
    
    
      19
      1.592076
      0.273890
      0.146964
      0.599073
      0.157378
      -0.018350
      1.373183
      0.731501
      1.082163
      0.239208
      ...
      -0.118251
      -0.117213
      -0.118080
      -0.119111
      -0.118104
      -0.119913
      -0.117396
      -0.118654
      -0.118405
      -0.118261
    
    
      20
      1.881287
      0.500971
      0.339721
      0.837057
      0.334529
      0.164354
      1.560274
      0.994008
      1.258380
      0.424926
      ...
      0.069587
      0.070743
      0.069160
      0.068213
      0.069308
      0.067350
      0.069411
      0.068381
      0.069260
      0.069441
    
    
      21
      1.779794
      0.401405
      0.312010
      0.698069
      0.318408
      0.152523
      1.513388
      0.869309
      1.254467
      0.397655
      ...
      0.057681
      0.058194
      0.057356
      0.056347
      0.057338
      0.055602
      0.058145
      0.056873
      0.056519
      0.056881
    
    
      22
      1.828111
      0.494877
      0.388137
      0.781140
      0.394939
      0.220233
      1.562980
      0.904613
      1.289716
      0.451814
      ...
      0.132100
      0.132281
      0.132357
      0.131231
      0.131339
      0.130287
      0.132901
      0.131594
      0.131253
      0.131225
    
    
      23
      1.730577
      0.376741
      0.229873
      0.677022
      0.242522
      0.059960
      1.432414
      0.807296
      1.102075
      0.312951
      ...
      -0.031330
      -0.030230
      -0.031318
      -0.032328
      -0.032039
      -0.033152
      -0.030759
      -0.031955
      -0.031693
      -0.031614
    
    
      24
      1.694522
      0.376933
      0.253272
      0.682713
      0.264136
      0.077822
      1.446517
      0.795710
      1.164392
      0.321791
      ...
      -0.014684
      -0.014053
      -0.014279
      -0.015374
      -0.014762
      -0.016291
      -0.013745
      -0.015018
      -0.014894
      -0.014597
    
    
      25
      1.727871
      0.417929
      0.224351
      0.767855
      0.267375
      0.062551
      1.451895
      0.926937
      1.121129
      0.323845
      ...
      -0.031864
      -0.030557
      -0.032598
      -0.033547
      -0.032828
      -0.034401
      -0.032314
      -0.033358
      -0.032279
      -0.032147
    
    
      26
      1.635990
      0.274588
      0.101468
      0.616187
      0.122095
      -0.078229
      1.346719
      0.763589
      0.993770
      0.182036
      ...
      -0.170200
      -0.169108
      -0.170576
      -0.171572
      -0.170612
      -0.172415
      -0.170117
      -0.171266
      -0.170403
      -0.170592
    
    
      27
      1.740088
      0.354233
      0.238607
      0.669742
      0.220662
      0.065411
      1.420334
      0.822770
      1.168002
      0.317823
      ...
      -0.029453
      -0.028791
      -0.029553
      -0.030603
      -0.029843
      -0.031526
      -0.029171
      -0.030349
      -0.030200
      -0.029897
    
    
      28
      1.763229
      0.305237
      0.143702
      0.646604
      0.155545
      -0.052099
      1.455266
      0.794786
      1.101047
      0.214700
      ...
      -0.147230
      -0.146417
      -0.147599
      -0.148611
      -0.147551
      -0.149746
      -0.147968
      -0.148857
      -0.147425
      -0.147386
    
    
      29
      1.721486
      0.305117
      0.161201
      0.653361
      0.177107
      -0.024477
      1.438054
      0.794554
      1.114125
      0.242750
      ...
      -0.121022
      -0.119837
      -0.121136
      -0.122169
      -0.120891
      -0.123279
      -0.121365
      -0.122322
      -0.121164
      -0.121070
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      913
      1.666579
      0.253584
      0.110468
      0.583751
      0.116912
      -0.077774
      1.398712
      0.723226
      1.048689
      0.181588
      ...
      -0.171898
      -0.170671
      -0.171629
      -0.172723
      -0.171687
      -0.173827
      -0.171658
      -0.172742
      -0.171937
      -0.171917
    
    
      914
      1.782713
      0.295057
      0.119114
      0.652527
      0.137479
      -0.080952
      1.469523
      0.805527
      1.096640
      0.192827
      ...
      -0.179510
      -0.178165
      -0.179925
      -0.180925
      -0.179621
      -0.182352
      -0.181210
      -0.181781
      -0.179523
      -0.179407
    
    
      915
      2.114647
      0.776284
      0.676297
      1.041718
      0.684214
      0.534025
      1.825319
      1.203179
      1.554587
      0.757356
      ...
      0.449040
      0.449276
      0.448875
      0.447915
      0.448394
      0.447354
      0.450072
      0.448713
      0.447869
      0.445994
    
    
      916
      1.628710
      0.302182
      0.119777
      0.639872
      0.151807
      -0.053450
      1.366394
      0.785345
      0.998455
      0.203406
      ...
      -0.143706
      -0.142410
      -0.143675
      -0.144735
      -0.144118
      -0.145729
      -0.143477
      -0.144603
      -0.143715
      -0.143814
    
    
      917
      1.792403
      0.411587
      0.276810
      0.713462
      0.285295
      0.087294
      1.528753
      0.835740
      1.183859
      0.323900
      ...
      -0.000187
      -0.000257
      0.000339
      -0.000810
      -0.000346
      -0.001926
      0.000433
      -0.000747
      -0.000201
      -0.000462
    
    
      918
      2.009068
      0.662560
      0.498204
      0.983812
      0.515062
      0.328728
      1.725886
      1.127777
      1.385882
      0.581029
      ...
      0.237983
      0.239166
      0.237489
      0.236565
      0.237232
      0.235714
      0.237704
      0.236709
      0.237609
      0.237596
    
    
      919
      1.791767
      0.298766
      0.120915
      0.660419
      0.137347
      -0.078528
      1.474686
      0.816930
      1.105276
      0.197418
      ...
      -0.178817
      -0.177352
      -0.179423
      -0.180345
      -0.178835
      -0.181566
      -0.180318
      -0.180942
      -0.178843
      -0.178565
    
    
      920
      1.719701
      0.399428
      0.269298
      0.727301
      0.283439
      0.108053
      1.505923
      0.853301
      1.207317
      0.362800
      ...
      0.008147
      0.009168
      0.008190
      0.007165
      0.008109
      0.006332
      0.008767
      0.007550
      0.007684
      0.008082
    
    
      921
      1.737385
      0.406929
      0.306377
      0.715986
      0.313558
      0.154064
      1.503194
      0.862045
      1.239481
      0.400976
      ...
      0.057607
      0.058435
      0.057794
      0.056699
      0.057414
      0.055785
      0.058336
      0.057061
      0.056748
      0.056940
    
    
      922
      1.713778
      0.397461
      0.215956
      0.737827
      0.254583
      0.063271
      1.440800
      0.895011
      1.131387
      0.320873
      ...
      -0.032182
      -0.030777
      -0.032796
      -0.033694
      -0.032793
      -0.034343
      -0.032049
      -0.033196
      -0.032470
      -0.032454
    
    
      923
      1.706361
      0.372656
      0.248091
      0.682693
      0.257925
      0.064780
      1.445598
      0.814870
      1.125088
      0.315891
      ...
      -0.021575
      -0.020518
      -0.021420
      -0.022475
      -0.021841
      -0.023326
      -0.020807
      -0.022066
      -0.022152
      -0.021770
    
    
      924
      1.720330
      0.280795
      0.124632
      0.621952
      0.110908
      -0.061391
      1.402562
      0.774385
      1.084280
      0.207162
      ...
      -0.158655
      -0.157395
      -0.158549
      -0.159640
      -0.158753
      -0.160853
      -0.158916
      -0.159885
      -0.158672
      -0.158653
    
    
      925
      1.777092
      0.294365
      0.109782
      0.654744
      0.129838
      -0.091650
      1.468494
      0.810631
      1.082171
      0.187589
      ...
      -0.191195
      -0.189711
      -0.191364
      -0.192423
      -0.191274
      -0.193802
      -0.192324
      -0.193063
      -0.191073
      -0.190834
    
    
      926
      1.751497
      0.434290
      0.312142
      0.762650
      0.325205
      0.158828
      1.528443
      0.896792
      1.253038
      0.415222
      ...
      0.059704
      0.060749
      0.059495
      0.058502
      0.059409
      0.057723
      0.060139
      0.058931
      0.059158
      0.059363
    
    
      927
      1.712309
      0.305573
      0.155946
      0.632095
      0.167316
      -0.032539
      1.425062
      0.753433
      1.065091
      0.224293
      ...
      -0.124886
      -0.124076
      -0.124889
      -0.125949
      -0.125259
      -0.126999
      -0.124858
      -0.125928
      -0.125147
      -0.125014
    
    
      928
      1.657932
      0.296235
      0.179281
      0.595050
      0.180524
      -0.005409
      1.386796
      0.729279
      1.073395
      0.240885
      ...
      -0.094650
      -0.094302
      -0.094463
      -0.095477
      -0.094768
      -0.096237
      -0.093702
      -0.094970
      -0.094926
      -0.094705
    
    
      929
      1.661575
      0.328323
      0.166748
      0.657706
      0.191843
      -0.010711
      1.416927
      0.785580
      1.075198
      0.243314
      ...
      -0.102364
      -0.101664
      -0.102450
      -0.103535
      -0.103053
      -0.104527
      -0.102172
      -0.103350
      -0.102756
      -0.102778
    
    
      930
      1.780794
      0.378120
      0.198269
      0.728400
      0.224823
      0.005714
      1.501380
      0.874981
      1.120846
      0.268609
      ...
      -0.088255
      -0.087260
      -0.088694
      -0.089697
      -0.088672
      -0.090846
      -0.089128
      -0.089987
      -0.088427
      -0.088265
    
    
      931
      2.116514
      0.747948
      0.643907
      1.028901
      0.641456
      0.460098
      1.837014
      1.151664
      1.521386
      0.686425
      ...
      0.374801
      0.374307
      0.375469
      0.374334
      0.374705
      0.373360
      0.375951
      0.374655
      0.374444
      0.374588
    
    
      932
      1.754121
      0.426328
      0.341012
      0.694960
      0.341435
      0.193409
      1.480986
      0.860876
      1.224712
      0.428915
      ...
      0.101315
      0.102064
      0.101451
      0.100485
      0.101174
      0.099944
      0.102726
      0.101335
      0.100500
      0.100455
    
    
      933
      1.924844
      0.552456
      0.451194
      0.834176
      0.455212
      0.277223
      1.669153
      0.977552
      1.360287
      0.514216
      ...
      0.187199
      0.187301
      0.187690
      0.186585
      0.187151
      0.185602
      0.188059
      0.186831
      0.186850
      0.186858
    
    
      934
      1.619096
      0.311680
      0.140613
      0.661829
      0.166992
      -0.029095
      1.389979
      0.806585
      1.048311
      0.234100
      ...
      -0.126905
      -0.125433
      -0.127063
      -0.128079
      -0.127169
      -0.128949
      -0.126624
      -0.127786
      -0.127045
      -0.126812
    
    
      935
      1.891812
      0.550513
      0.354913
      0.893214
      0.398759
      0.180505
      1.597816
      1.040154
      1.257774
      0.441011
      ...
      0.097996
      0.099307
      0.097600
      0.096608
      0.097212
      0.095681
      0.097792
      0.096736
      0.097826
      0.097654
    
    
      936
      1.696368
      0.322611
      0.144344
      0.662304
      0.167841
      -0.053828
      1.413778
      0.797814
      1.017104
      0.200020
      ...
      -0.140822
      -0.139885
      -0.140963
      -0.142061
      -0.141456
      -0.143177
      -0.141018
      -0.142097
      -0.140977
      -0.140965
    
    
      937
      1.717658
      0.431368
      0.247851
      0.783208
      0.283796
      0.090247
      1.463735
      0.933362
      1.158240
      0.352451
      ...
      -0.007177
      -0.005747
      -0.007761
      -0.008705
      -0.007991
      -0.009425
      -0.007085
      -0.008255
      -0.007621
      -0.007369
    
    
      938
      1.676950
      0.346339
      0.177518
      0.689906
      0.199740
      0.003297
      1.429565
      0.830905
      1.070986
      0.262183
      ...
      -0.092434
      -0.091197
      -0.092851
      -0.093801
      -0.092953
      -0.094539
      -0.092217
      -0.093378
      -0.092686
      -0.092423
    
    
      939
      1.822346
      0.419125
      0.286430
      0.715605
      0.294442
      0.106633
      1.514591
      0.853050
      1.195304
      0.359260
      ...
      0.014060
      0.014688
      0.014123
      0.013060
      0.013669
      0.011978
      0.014065
      0.013021
      0.013639
      0.013796
    
    
      940
      1.591515
      0.275269
      0.102195
      0.624383
      0.133762
      -0.069553
      1.320734
      0.765529
      1.035088
      0.192697
      ...
      -0.166179
      -0.164981
      -0.166278
      -0.167392
      -0.166679
      -0.168486
      -0.166217
      -0.167352
      -0.166575
      -0.166414
    
    
      941
      1.810363
      0.404799
      0.275450
      0.726616
      0.281316
      0.087068
      1.550310
      0.850057
      1.205745
      0.342987
      ...
      -0.008362
      -0.007757
      -0.008225
      -0.009218
      -0.008232
      -0.010138
      -0.008009
      -0.009074
      -0.008466
      -0.008049
    
    
      942
      1.838431
      0.479648
      0.384963
      0.780521
      0.388442
      0.240998
      1.564232
      0.946704
      1.289865
      0.487383
      ...
      0.147027
      0.148208
      0.147193
      0.146199
      0.146916
      0.145539
      0.148194
      0.146867
      0.146298
      0.146415
    
  

943 rows × 1682 columns



In [ ]:



In [23]:

    
# def get_user_prediction()

user_pred = pd.DataFrame(user_prediction)[:1].T
user_pred['movie'] = user_pred.index
user_pred.columns = ['similarity', 'movie']


user_pred.nlargest(10, columns='similarity').tail(10)



In [27]:

    
ratings[ratings['user_id'] == 1].head(5)









    Out[27]:







  
    
      
      user_id
      movie_id
      rating
      unix_timestamp
    
  
  
    
      202
      1
      61
      4
      878542420
    
    
      305
      1
      189
      3
      888732928
    
    
      333
      1
      33
      4
      878542699
    
    
      334
      1
      160
      4
      875072547
    
    
      478
      1
      20
      4
      887431883



In [ ]:



In [ ]:



In [ ]:



In [ ]:

    
# Building a simple popularity and collaborative filtering model using Turicreate

'''
After installing turicreate, first let’s import it and 
read the train and test dataset in our environment. 
Since we will be using turicreate, we will need to convert the dataset in SFrames.
'''
import turicreate
train_data = turicreate.SFrame(ratings_train)
test_data = turicreate.SFrame(ratings_test)



In [ ]:

    
'''
We have user behavior as well as attributes of the users and movies, 
so we can make content based as well as collaborative filtering algorithms. 
We will start with a simple popularity model and then build a collaborative filtering model.

First we’ll build a model which will recommend movies based on the 
most popular choices, i.e., a model where all the users receive the 
same recommendation(s). We will use the turicreate recommender
function popularity_recommender for this.
'''

popularity_model = turicreate.popularity_recommender.create(train_data,
                                                            user_id='user_id',
                                                            item_id='movie_id',
                                                            target='rating')



In [ ]:

    
'''It’s prediction time! We will recommend the top 5 items for the first 5 users in our dataset.'''

popularity_recomm = popularity_model.recommend(users=[1,2,3,4,5],k=5)

popularity_recomm.print_rows(num_rows=25)



In [ ]:

    
'''Note that the recommendations for all users are the same – 1467, 1201, 1189, 1122, 814.
And they’re all in the same order! This confirms that all the recommended movies 
have an average rating of 5, i.e. all the users who watched the movie gave it a top 
rating. Thus our popularity system works as expected.

After building a popularity model, we will now build a collaborative 
filtering model. Let’s train the item similarity model and make top 5 
recommendations for the first 5 users.
'''

#Training the model
item_sim_model = turicreate.item_similarity_recommender.create(train_data,
                                                               user_id='user_id',
                                                               item_id='movie_id',
                                                               target='rating',
                                                               similarity_type='cosine')

#Making recommendations
item_sim_recomm = item_sim_model.recommend(users=[1,2,3,4,5],k=5)

item_sim_recomm.print_rows(num_rows=25)



In [ ]:

    
'''Here we can see that the recommendations (movie_id) are different 
for each user. So personalization exists, i.e. for different users 
we have a different set of recommendations.

In this model, we do not have the ratings for each movie 
given by each user. We must find a way to predict all these missing 
ratings. For that, we have to find a set of features which can 
define how a user rates the movies. These are called latent features. 
We need to find a way to extract the most important latent features 
from the the existing features. Matrix factorization, covered in the 
next section, is one such technique which uses the lower dimension 
dense matrix and helps in extracting the important latent features.'''



In [ ]:

    
# Building a recommendation engine using matrix factorization

class MF():
    '''
    R – The user-movie rating matrix
    K – Number of latent features
    alpha – Learning rate for stochastic gradient descent
    beta – Regularization parameter for bias
    iterations – Number of iterations to perform stochastic gradient descent
    '''

    # Initializing the user-movie rating matrix, no. of latent features, alpha and beta.
    def __init__(self, R, K, alpha, beta, iterations):
        self.R = R
        self.num_users, self.num_items = R.shape
        self.K = K
        self.alpha = alpha
        self.beta = beta
        self.iterations = iterations

    # Initializing user-feature and movie-feature matrix 
    def train(self):
        self.P = np.random.normal(scale=1./self.K, size=(self.num_users, self.K))
        self.Q = np.random.normal(scale=1./self.K, size=(self.num_items, self.K))

        # Initializing the bias terms
        self.b_u = np.zeros(self.num_users)
        self.b_i = np.zeros(self.num_items)
        self.b = np.mean(self.R[np.where(self.R != 0)])

        # List of training samples
        self.samples = [
        (i, j, self.R[i, j])
        for i in range(self.num_users)
        for j in range(self.num_items)
        if self.R[i, j] > 0
        ]

        # Stochastic gradient descent for given number of iterations
        training_process = []
        for i in range(self.iterations):
            np.random.shuffle(self.samples)
            self.sgd()
            mse = self.mse()
            training_process.append((i, mse))
            if (i+1) % 20 == 0:
                print("Iteration: %d ; error = %.4f" % (i+1, mse))

        return training_process

    # Computing total mean squared error
    def mse(self):
        xs, ys = self.R.nonzero()
        predicted = self.full_matrix()
        error = 0
        for x, y in zip(xs, ys):
            error += pow(self.R[x, y] - predicted[x, y], 2)
        return np.sqrt(error)

    # Stochastic gradient descent to get optimized P and Q matrix
    def sgd(self):
        for i, j, r in self.samples:
            prediction = self.get_rating(i, j)
            e = (r - prediction)

            self.b_u[i] += self.alpha * (e - self.beta * self.b_u[i])
            self.b_i[j] += self.alpha * (e - self.beta * self.b_i[j])

            self.P[i, :] += self.alpha * (e * self.Q[j, :] - self.beta * self.P[i,:])
            self.Q[j, :] += self.alpha * (e * self.P[i, :] - self.beta * self.Q[j,:])

    # Ratings for user i and moive j
    def get_rating(self, i, j):
        prediction = self.b + self.b_u[i] + self.b_i[j] + self.P[i, :].dot(self.Q[j, :].T)
        return prediction

    # Full user-movie rating matrix
    def full_matrix(self):
        return mf.b + mf.b_u[:,np.newaxis] + mf.b_i[np.newaxis:,] + mf.P.dot(mf.Q.T)



In [ ]:

    
'''We have to convert the user item ratings to matrix form. It can be done using the pivot function in python.'''
R= np.array(ratings.pivot(index = 'user_id',
                          columns ='movie_id',
                          values = 'rating').fillna(0))



In [ ]:

    
'''
fillna(0) will fill all the missing ratings with 0. Now we have the R matrix. 
We can initialize the number of latent features, but the number of these 
features must be less than or equal to the number of original features.

Now let us predict all the missing ratings. 

Let’s take K=20, alpha=0.001, beta=0.01 and iterations=100.
'''
mf = MF(R, K=20, alpha=0.001, beta=0.01, iterations=100)

training_process = mf.train()
print()
print("P x Q:")
print(mf.full_matrix())
print()



In [ ]:

    
popularity_model



In [ ]:

    
model_performance = turicreate.recommender.util.compare_models(test_data, [popularity_model, item_sim_model])



In [ ]:

	user_id	age	sex	occupation	zip_code
0	1	24	M	technician	85711
1	2	53	F	other	94043
2	3	23	M	writer	32067
3	4	24	M	technician	43537
4	5	33	F	other	15213

	user_id	movie_id	rating	unix_timestamp
0	196	242	3	881250949
1	186	302	3	891717742
2	22	377	1	878887116
3	244	51	2	880606923
4	166	346	1	886397596

	movie id	movie title	release date	video release date	IMDb URL	Action	Adventure	Animation	Children's	...	Thriller
0	1	Toy Story (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Toy%20Story%2...	0	0	1	1	...	0
1	2	GoldenEye (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?GoldenEye%20(...	1	1	0	0	...	1
2	3	Four Rooms (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Four%20Rooms%...	0	0	0	0	...	1
3	4	Get Shorty (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Get%20Shorty%...	1	0	0	0	...	0
4	5	Copycat (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Copycat%20(1995)	0	0	0	0	...	1

	0	1	2	3	4	5	6	7	8	9	...	1672	1673	1674	1675	1676	1677	1678	1679	1680	1681
0	2.065326	0.734303	0.629924	1.010669	0.640686	0.476150	1.784569	1.163032	1.513350	0.704478	...	0.394041	0.394434	0.393981	0.392972	0.393344	0.392272	0.394909	0.393590	0.393049	0.392771
1	1.763088	0.384040	0.196179	0.731538	0.225643	0.003892	1.493597	0.876153	1.108467	0.261991	...	-0.086942	-0.085491	-0.087137	-0.088158	-0.087298	-0.089288	-0.087468	-0.088378	-0.086918	-0.086712
2	1.795904	0.329047	0.158829	0.684154	0.173277	-0.035621	1.488230	0.835769	1.135426	0.236383	...	-0.134795	-0.133537	-0.135543	-0.136438	-0.135041	-0.137611	-0.136374	-0.136992	-0.134969	-0.134765
3	1.729951	0.293913	0.127741	0.644932	0.142143	-0.062261	1.437010	0.796249	1.096663	0.211789	...	-0.161413	-0.160220	-0.161542	-0.162586	-0.161634	-0.163877	-0.162283	-0.163080	-0.161442	-0.161248
4	1.796651	0.454474	0.354422	0.763130	0.359539	0.195987	1.547370	0.908904	1.292027	0.437954	...	0.101762	0.102405	0.101923	0.100839	0.101711	0.099951	0.102515	0.101233	0.101075	0.101201
5	1.996889	0.651172	0.528276	0.921054	0.537388	0.343124	1.722899	1.030714	1.375829	0.570538	...	0.261308	0.261053	0.261749	0.260694	0.260574	0.259848	0.262375	0.261111	0.261061	0.261089
6	2.466055	1.099186	1.016489	1.362861	1.006458	0.853382	2.180302	1.498903	1.891000	1.072978	...	0.765553	0.765650	0.766179	0.765129	0.765580	0.764340	0.766967	0.765654	0.764983	0.765165
7	1.693486	0.296285	0.195398	0.612075	0.198638	0.025983	1.400383	0.778876	1.136172	0.278078	...	-0.070712	-0.070081	-0.071161	-0.072135	-0.071083	-0.072866	-0.070435	-0.071651	-0.071744	-0.071357
8	1.681165	0.279528	0.121887	0.616094	0.136368	-0.075169	1.384177	0.755221	1.050439	0.191584	...	-0.161335	-0.160330	-0.161420	-0.162461	-0.161416	-0.163505	-0.161425	-0.162465	-0.161396	-0.161430
9	2.021688	0.650911	0.530669	0.916215	0.531720	0.348246	1.722691	1.048935	1.389699	0.578968	...	0.264904	0.264997	0.265291	0.264285	0.264545	0.263527	0.266037	0.264782	0.264589	0.264781
10	1.929299	0.556882	0.434658	0.852914	0.440983	0.265378	1.664166	0.981898	1.323793	0.503246	...	0.173347	0.174086	0.173556	0.172490	0.173118	0.171513	0.173820	0.172667	0.172741	0.172829
11	1.678390	0.314754	0.197958	0.612868	0.201247	0.023850	1.433966	0.756071	1.103218	0.277791	...	-0.070096	-0.068885	-0.069612	-0.070621	-0.069686	-0.071449	-0.069072	-0.070261	-0.070224	-0.069986
12	2.712718	1.329131	1.233461	1.605217	1.234788	1.069379	2.431112	1.749882	2.125160	1.299373	...	0.982402	0.982351	0.982131	0.981219	0.982038	0.980475	0.982634	0.981554	0.981412	0.981683
13	1.788173	0.441983	0.298060	0.731027	0.315628	0.121110	1.486196	0.868834	1.177206	0.361001	...	0.034769	0.034984	0.034865	0.033763	0.033961	0.032813	0.035323	0.034068	0.034362	0.034297
14	1.760710	0.413425	0.233431	0.763382	0.262370	0.051651	1.509711	0.905781	1.132220	0.310825	...	-0.042489	-0.041248	-0.042949	-0.043921	-0.043102	-0.044849	-0.042814	-0.043831	-0.042679	-0.042507
15	1.867527	0.521319	0.421546	0.793889	0.417516	0.259170	1.593142	0.936810	1.299501	0.500552	...	0.167283	0.167748	0.167330	0.166346	0.166951	0.165676	0.168274	0.166975	0.166552	0.166812
16	1.600336	0.284676	0.100644	0.622172	0.132878	-0.079807	1.312474	0.770614	0.977713	0.177197	...	-0.168594	-0.167491	-0.168589	-0.169773	-0.169418	-0.170971	-0.168635	-0.169803	-0.168776	-0.169174
17	2.159141	0.821841	0.712117	1.077871	0.714946	0.526902	1.910413	1.185414	1.554596	0.749567	...	0.449683	0.449640	0.450485	0.449374	0.449676	0.448488	0.451165	0.449827	0.449474	0.449380
18	1.686697	0.259854	0.109146	0.576473	0.120221	-0.077723	1.401744	0.715701	1.062269	0.183672	...	-0.173982	-0.172898	-0.174075	-0.175106	-0.174050	-0.176190	-0.174236	-0.175213	-0.174338	-0.173941
19	1.592076	0.273890	0.146964	0.599073	0.157378	-0.018350	1.373183	0.731501	1.082163	0.239208	...	-0.118251	-0.117213	-0.118080	-0.119111	-0.118104	-0.119913	-0.117396	-0.118654	-0.118405	-0.118261
20	1.881287	0.500971	0.339721	0.837057	0.334529	0.164354	1.560274	0.994008	1.258380	0.424926	...	0.069587	0.070743	0.069160	0.068213	0.069308	0.067350	0.069411	0.068381	0.069260	0.069441
21	1.779794	0.401405	0.312010	0.698069	0.318408	0.152523	1.513388	0.869309	1.254467	0.397655	...	0.057681	0.058194	0.057356	0.056347	0.057338	0.055602	0.058145	0.056873	0.056519	0.056881
22	1.828111	0.494877	0.388137	0.781140	0.394939	0.220233	1.562980	0.904613	1.289716	0.451814	...	0.132100	0.132281	0.132357	0.131231	0.131339	0.130287	0.132901	0.131594	0.131253	0.131225
23	1.730577	0.376741	0.229873	0.677022	0.242522	0.059960	1.432414	0.807296	1.102075	0.312951	...	-0.031330	-0.030230	-0.031318	-0.032328	-0.032039	-0.033152	-0.030759	-0.031955	-0.031693	-0.031614
24	1.694522	0.376933	0.253272	0.682713	0.264136	0.077822	1.446517	0.795710	1.164392	0.321791	...	-0.014684	-0.014053	-0.014279	-0.015374	-0.014762	-0.016291	-0.013745	-0.015018	-0.014894	-0.014597
25	1.727871	0.417929	0.224351	0.767855	0.267375	0.062551	1.451895	0.926937	1.121129	0.323845	...	-0.031864	-0.030557	-0.032598	-0.033547	-0.032828	-0.034401	-0.032314	-0.033358	-0.032279	-0.032147
26	1.635990	0.274588	0.101468	0.616187	0.122095	-0.078229	1.346719	0.763589	0.993770	0.182036	...	-0.170200	-0.169108	-0.170576	-0.171572	-0.170612	-0.172415	-0.170117	-0.171266	-0.170403	-0.170592
27	1.740088	0.354233	0.238607	0.669742	0.220662	0.065411	1.420334	0.822770	1.168002	0.317823	...	-0.029453	-0.028791	-0.029553	-0.030603	-0.029843	-0.031526	-0.029171	-0.030349	-0.030200	-0.029897
28	1.763229	0.305237	0.143702	0.646604	0.155545	-0.052099	1.455266	0.794786	1.101047	0.214700	...	-0.147230	-0.146417	-0.147599	-0.148611	-0.147551	-0.149746	-0.147968	-0.148857	-0.147425	-0.147386
29	1.721486	0.305117	0.161201	0.653361	0.177107	-0.024477	1.438054	0.794554	1.114125	0.242750	...	-0.121022	-0.119837	-0.121136	-0.122169	-0.120891	-0.123279	-0.121365	-0.122322	-0.121164	-0.121070
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
913	1.666579	0.253584	0.110468	0.583751	0.116912	-0.077774	1.398712	0.723226	1.048689	0.181588	...	-0.171898	-0.170671	-0.171629	-0.172723	-0.171687	-0.173827	-0.171658	-0.172742	-0.171937	-0.171917
914	1.782713	0.295057	0.119114	0.652527	0.137479	-0.080952	1.469523	0.805527	1.096640	0.192827	...	-0.179510	-0.178165	-0.179925	-0.180925	-0.179621	-0.182352	-0.181210	-0.181781	-0.179523	-0.179407
915	2.114647	0.776284	0.676297	1.041718	0.684214	0.534025	1.825319	1.203179	1.554587	0.757356	...	0.449040	0.449276	0.448875	0.447915	0.448394	0.447354	0.450072	0.448713	0.447869	0.445994
916	1.628710	0.302182	0.119777	0.639872	0.151807	-0.053450	1.366394	0.785345	0.998455	0.203406	...	-0.143706	-0.142410	-0.143675	-0.144735	-0.144118	-0.145729	-0.143477	-0.144603	-0.143715	-0.143814
917	1.792403	0.411587	0.276810	0.713462	0.285295	0.087294	1.528753	0.835740	1.183859	0.323900	...	-0.000187	-0.000257	0.000339	-0.000810	-0.000346	-0.001926	0.000433	-0.000747	-0.000201	-0.000462
918	2.009068	0.662560	0.498204	0.983812	0.515062	0.328728	1.725886	1.127777	1.385882	0.581029	...	0.237983	0.239166	0.237489	0.236565	0.237232	0.235714	0.237704	0.236709	0.237609	0.237596
919	1.791767	0.298766	0.120915	0.660419	0.137347	-0.078528	1.474686	0.816930	1.105276	0.197418	...	-0.178817	-0.177352	-0.179423	-0.180345	-0.178835	-0.181566	-0.180318	-0.180942	-0.178843	-0.178565
920	1.719701	0.399428	0.269298	0.727301	0.283439	0.108053	1.505923	0.853301	1.207317	0.362800	...	0.008147	0.009168	0.008190	0.007165	0.008109	0.006332	0.008767	0.007550	0.007684	0.008082
921	1.737385	0.406929	0.306377	0.715986	0.313558	0.154064	1.503194	0.862045	1.239481	0.400976	...	0.057607	0.058435	0.057794	0.056699	0.057414	0.055785	0.058336	0.057061	0.056748	0.056940
922	1.713778	0.397461	0.215956	0.737827	0.254583	0.063271	1.440800	0.895011	1.131387	0.320873	...	-0.032182	-0.030777	-0.032796	-0.033694	-0.032793	-0.034343	-0.032049	-0.033196	-0.032470	-0.032454
923	1.706361	0.372656	0.248091	0.682693	0.257925	0.064780	1.445598	0.814870	1.125088	0.315891	...	-0.021575	-0.020518	-0.021420	-0.022475	-0.021841	-0.023326	-0.020807	-0.022066	-0.022152	-0.021770
924	1.720330	0.280795	0.124632	0.621952	0.110908	-0.061391	1.402562	0.774385	1.084280	0.207162	...	-0.158655	-0.157395	-0.158549	-0.159640	-0.158753	-0.160853	-0.158916	-0.159885	-0.158672	-0.158653
925	1.777092	0.294365	0.109782	0.654744	0.129838	-0.091650	1.468494	0.810631	1.082171	0.187589	...	-0.191195	-0.189711	-0.191364	-0.192423	-0.191274	-0.193802	-0.192324	-0.193063	-0.191073	-0.190834
926	1.751497	0.434290	0.312142	0.762650	0.325205	0.158828	1.528443	0.896792	1.253038	0.415222	...	0.059704	0.060749	0.059495	0.058502	0.059409	0.057723	0.060139	0.058931	0.059158	0.059363
927	1.712309	0.305573	0.155946	0.632095	0.167316	-0.032539	1.425062	0.753433	1.065091	0.224293	...	-0.124886	-0.124076	-0.124889	-0.125949	-0.125259	-0.126999	-0.124858	-0.125928	-0.125147	-0.125014
928	1.657932	0.296235	0.179281	0.595050	0.180524	-0.005409	1.386796	0.729279	1.073395	0.240885	...	-0.094650	-0.094302	-0.094463	-0.095477	-0.094768	-0.096237	-0.093702	-0.094970	-0.094926	-0.094705
929	1.661575	0.328323	0.166748	0.657706	0.191843	-0.010711	1.416927	0.785580	1.075198	0.243314	...	-0.102364	-0.101664	-0.102450	-0.103535	-0.103053	-0.104527	-0.102172	-0.103350	-0.102756	-0.102778
930	1.780794	0.378120	0.198269	0.728400	0.224823	0.005714	1.501380	0.874981	1.120846	0.268609	...	-0.088255	-0.087260	-0.088694	-0.089697	-0.088672	-0.090846	-0.089128	-0.089987	-0.088427	-0.088265
931	2.116514	0.747948	0.643907	1.028901	0.641456	0.460098	1.837014	1.151664	1.521386	0.686425	...	0.374801	0.374307	0.375469	0.374334	0.374705	0.373360	0.375951	0.374655	0.374444	0.374588
932	1.754121	0.426328	0.341012	0.694960	0.341435	0.193409	1.480986	0.860876	1.224712	0.428915	...	0.101315	0.102064	0.101451	0.100485	0.101174	0.099944	0.102726	0.101335	0.100500	0.100455
933	1.924844	0.552456	0.451194	0.834176	0.455212	0.277223	1.669153	0.977552	1.360287	0.514216	...	0.187199	0.187301	0.187690	0.186585	0.187151	0.185602	0.188059	0.186831	0.186850	0.186858
934	1.619096	0.311680	0.140613	0.661829	0.166992	-0.029095	1.389979	0.806585	1.048311	0.234100	...	-0.126905	-0.125433	-0.127063	-0.128079	-0.127169	-0.128949	-0.126624	-0.127786	-0.127045	-0.126812
935	1.891812	0.550513	0.354913	0.893214	0.398759	0.180505	1.597816	1.040154	1.257774	0.441011	...	0.097996	0.099307	0.097600	0.096608	0.097212	0.095681	0.097792	0.096736	0.097826	0.097654
936	1.696368	0.322611	0.144344	0.662304	0.167841	-0.053828	1.413778	0.797814	1.017104	0.200020	...	-0.140822	-0.139885	-0.140963	-0.142061	-0.141456	-0.143177	-0.141018	-0.142097	-0.140977	-0.140965
937	1.717658	0.431368	0.247851	0.783208	0.283796	0.090247	1.463735	0.933362	1.158240	0.352451	...	-0.007177	-0.005747	-0.007761	-0.008705	-0.007991	-0.009425	-0.007085	-0.008255	-0.007621	-0.007369
938	1.676950	0.346339	0.177518	0.689906	0.199740	0.003297	1.429565	0.830905	1.070986	0.262183	...	-0.092434	-0.091197	-0.092851	-0.093801	-0.092953	-0.094539	-0.092217	-0.093378	-0.092686	-0.092423
939	1.822346	0.419125	0.286430	0.715605	0.294442	0.106633	1.514591	0.853050	1.195304	0.359260	...	0.014060	0.014688	0.014123	0.013060	0.013669	0.011978	0.014065	0.013021	0.013639	0.013796
940	1.591515	0.275269	0.102195	0.624383	0.133762	-0.069553	1.320734	0.765529	1.035088	0.192697	...	-0.166179	-0.164981	-0.166278	-0.167392	-0.166679	-0.168486	-0.166217	-0.167352	-0.166575	-0.166414
941	1.810363	0.404799	0.275450	0.726616	0.281316	0.087068	1.550310	0.850057	1.205745	0.342987	...	-0.008362	-0.007757	-0.008225	-0.009218	-0.008232	-0.010138	-0.008009	-0.009074	-0.008466	-0.008049
942	1.838431	0.479648	0.384963	0.780521	0.388442	0.240998	1.564232	0.946704	1.289865	0.487383	...	0.147027	0.148208	0.147193	0.146199	0.146916	0.145539	0.148194	0.146867	0.146298	0.146415

	similarity	movie
49	2.820170	49
99	2.427339	99
257	2.419308	257
180	2.316165	180
285	2.296358	285
287	2.151160	287
299	2.129948	299
126	2.079910	126
0	2.065326	0
293	2.029749	293

	user_id	movie_id	rating	unix_timestamp
202	1	61	4	878542420
305	1	189	3	888732928
333	1	33	4	878542699
334	1	160	4	875072547
478	1	20	4	887431883