notebook.community

Edit and run



In [1]:

    
# The data is using movie lens here, from 
## F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets: History and Context. 
## ACM Transactions on Interactive Intelligent Systems (TiiS) 5, 4, Article 19 (December 2015), 19 pages. 
## DOI=http://dx.doi.org/10.1145/2827872

import pandas as pd
import numpy as np



In [2]:

    
root_folder = 'ml-100k/'
rating_data = root_folder + 'u.data'
user_data = root_folder + 'u.user'
item_data = root_folder + 'u.item'

rating_train = root_folder + 'ua.base'
rating_test = root_folder + 'ua.test'



In [13]:

    
# The original data do not have column names in the csv file, you need to check their ReadMe, and add column names
## the data here can contain duplicated userid or itemid
rating_data_cols = ['userid', 'itemid', 'rating', 'timestamp']
rating_data_df = pd.read_csv(rating_data, sep='\t', names = rating_data_cols, encoding='latin-1')
print rating_data_df.shape
rating_data_df.head()









    



(100000, 4)






    Out[13]:







  
    
      
      userid
      itemid
      rating
      timestamp
    
  
  
    
      0
      196
      242
      3
      881250949
    
    
      1
      186
      302
      3
      891717742
    
    
      2
      22
      377
      1
      878887116
    
    
      3
      244
      51
      2
      880606923
    
    
      4
      166
      346
      1
      886397596



In [8]:

    
# Method 1 - DIY collaborative filtering

user_ct = rating_data_df['userid'].unique().shape[0]
item_ct = rating_data_df['itemid'].unique().shape[0]
print(user_ct, item_ct)









    



(943, 1682)



In [9]:

    
for line in rating_data_df.itertuples():
    print line
    print line[1]
    break









    



Pandas(Index=0, userid=196, itemid=242, rating=3, timestamp=881250949)
196



In [10]:

    
data_matrix = np.zeros((user_ct, item_ct))

for r in rating_data_df.itertuples():
    data_matrix[r[1]-1, r[2]-1] = r[3]
    
data_matrix









    Out[10]:





array([[ 5.,  3.,  4., ...,  0.,  0.,  0.],
       [ 4.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 5.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  5.,  0., ...,  0.,  0.,  0.]])



In [11]:

    
# calculate similarity
from sklearn.metrics.pairwise import pairwise_distances 

user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

user_similarity









    Out[11]:





array([[ 0.        ,  0.83306902,  0.95254046, ...,  0.85138306,
         0.82049212,  0.60182526],
       [ 0.83306902,  0.        ,  0.88940868, ...,  0.83851522,
         0.82773219,  0.89420212],
       [ 0.95254046,  0.88940868,  0.        , ...,  0.89875744,
         0.86658385,  0.97344413],
       ..., 
       [ 0.85138306,  0.83851522,  0.89875744, ...,  0.        ,
         0.8983582 ,  0.90488042],
       [ 0.82049212,  0.82773219,  0.86658385, ...,  0.8983582 ,
         0.        ,  0.81753534],
       [ 0.60182526,  0.89420212,  0.97344413, ...,  0.90488042,
         0.81753534,  0.        ]])



In [12]:

    
print(data_matrix.shape)
print(user_similarity.shape, item_similarity.shape)









    



(943, 1682)
((943, 943), (1682, 1682))



In [14]:

    
# user-user recommendation: predict the score that each item can be recommended to each user
rating_mean = data_matrix.mean(axis=1)
ratings_diff = data_matrix - rating_mean[:, np.newaxis]
user_recommendation = rating_mean[:, np.newaxis] \
            + user_similarity.dot(ratings_diff)/np.array([np.abs(user_similarity).sum(axis=1)]).T
print(user_recommendation.shape)
user_recommendation









    



(943, 1682)






    Out[14]:





array([[ 2.06532606,  0.73430275,  0.62992381, ...,  0.39359041,
         0.39304874,  0.3927712 ],
       [ 1.76308836,  0.38404019,  0.19617889, ..., -0.08837789,
        -0.0869183 , -0.08671183],
       [ 1.79590398,  0.32904733,  0.15882885, ..., -0.13699223,
        -0.13496852, -0.13476488],
       ..., 
       [ 1.59151513,  0.27526889,  0.10219534, ..., -0.16735162,
        -0.16657451, -0.16641377],
       [ 1.81036267,  0.40479877,  0.27545013, ..., -0.00907358,
        -0.00846587, -0.00804858],
       [ 1.8384313 ,  0.47964837,  0.38496292, ...,  0.14686675,
         0.14629808,  0.14641455]])



In [15]:

    
# item-item prediction: predict the score that each item can be recommended to each user
item_recommendation = data_matrix.dot(item_similarity)/np.array([np.abs(item_similarity).sum(axis=1)])
print(item_recommendation.shape)
item_recommendation









    



(943, 1682)






    Out[15]:





array([[ 0.44627765,  0.475473  ,  0.50593755, ...,  0.58815455,
         0.5731069 ,  0.56669645],
       [ 0.10854432,  0.13295661,  0.12558851, ...,  0.13445801,
         0.13657587,  0.13711081],
       [ 0.08568497,  0.09169006,  0.08764343, ...,  0.08465892,
         0.08976784,  0.09084451],
       ..., 
       [ 0.03230047,  0.0450241 ,  0.04292449, ...,  0.05302764,
         0.0519099 ,  0.05228033],
       [ 0.15777917,  0.17409459,  0.18900003, ...,  0.19979296,
         0.19739388,  0.20003117],
       [ 0.24767207,  0.24489212,  0.28263031, ...,  0.34410424,
         0.33051406,  0.33102478]])



In [16]:

    
# Method 2 - using turicreate collaborative filtering
import turicreate

ua_cols = ['userid', 'movieid', 'rating', 'unix_timestamp']
ratings_train = pd.read_csv(rating_train, sep='\t', names=ua_cols, encoding='latin-1')
ratings_test = pd.read_csv(rating_test, sep='\t', names=ua_cols, encoding='latin-1')
print ratings_train.shape, ratings_test.shape









    



(90570, 4) (9430, 4)



In [18]:

    
train_data = turicreate.SFrame(ratings_train)
test_data = turicreate.SFrame(ratings_test)
train_data.head()









    Out[18]:





    
        userid
        movieid
        rating
        unix_timestamp
    
    
        1
        1
        5
        874965758
    
    
        1
        2
        3
        876893171
    
    
        1
        3
        4
        878542960
    
    
        1
        4
        3
        876893119
    
    
        1
        5
        3
        889751712
    
    
        1
        6
        5
        887431973
    
    
        1
        7
        4
        875071561
    
    
        1
        8
        1
        875072484
    
    
        1
        9
        5
        878543541
    
    
        1
        10
        3
        875693118
    

[10 rows x 4 columns]



In [20]:

    
## Turicreate - recommend most popular items (in fact this will recommend the same thing to everyone)
popularity_model = turicreate.popularity_recommender\
                   .create(train_data, user_id='userid', item_id='movieid', target='rating')









    




Recsys training: model = popularity






    




Warning: Ignoring columns unix_timestamp;






    




    To use these columns in scoring predictions, use a model that allows the use of additional features.






    




Preparing data set.






    




    Data has 90570 observations with 943 users and 1680 items.






    




    Data prepared in: 0.130378s






    




90570 observations to process; with 1680 unique items.



In [24]:

    
# see, everyone gets the same recommendation
popularity_recomm = popularity_model.recommend(users=[4,10,7,9],k=3)
popularity_recomm.print_rows(num_rows=12)









    



+--------+---------+-------+------+
| userid | movieid | score | rank |
+--------+---------+-------+------+
|   4    |   1189  |  5.0  |  1   |
|   4    |   1122  |  5.0  |  2   |
|   4    |   814   |  5.0  |  3   |
|   10   |   1189  |  5.0  |  1   |
|   10   |   1122  |  5.0  |  2   |
|   10   |   814   |  5.0  |  3   |
|   7    |   1189  |  5.0  |  1   |
|   7    |   1122  |  5.0  |  2   |
|   7    |   814   |  5.0  |  3   |
|   9    |   1189  |  5.0  |  1   |
|   9    |   1122  |  5.0  |  2   |
|   9    |   814   |  5.0  |  3   |
+--------+---------+-------+------+
[12 rows x 4 columns]



In [26]:

    
## Turicreate - collaborative filtering
#Training the model
item_sim_model = turicreate.item_similarity_recommender\
            .create(train_data, user_id='userid', item_id='movieid', target='rating', similarity_type='cosine')









    




Recsys training: model = item_similarity






    




Warning: Ignoring columns unix_timestamp;






    




    To use these columns in scoring predictions, use a model that allows the use of additional features.






    




Preparing data set.






    




    Data has 90570 observations with 943 users and 1680 items.






    




    Data prepared in: 0.12825s






    




Training model from provided data.






    




Gathering per-item and per-user statistics.






    




+--------------------------------+------------+






    




| Elapsed Time (Item Statistics) | % Complete |






    




+--------------------------------+------------+






    




| 7.317ms                        | 100        |






    




+--------------------------------+------------+






    




Setting up lookup tables.






    




Processing data in one pass using dense lookup tables.






    




+-------------------------------------+------------------+-----------------+






    




| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |






    




+-------------------------------------+------------------+-----------------+






    




| 19.633ms                            | 0.25             | 6               |






    




| 167.108ms                           | 100              | 1680            |






    




+-------------------------------------+------------------+-----------------+






    




Finalizing lookup tables.






    




Generating candidate set for working with new users.






    




Finished training in 0.174317s



In [27]:

    
# Making recommendations
item_sim_recomm = item_sim_model.recommend(users=[4,10,7,9],k=3)
item_sim_recomm.print_rows(num_rows=12)









    



+--------+---------+----------------+------+
| userid | movieid |     score      | rank |
+--------+---------+----------------+------+
|   4    |    50   | 1.13114770821  |  1   |
|   4    |   288   | 1.04871511459  |  2   |
|   4    |    56   | 0.996869802475 |  3   |
|   10   |   204   | 1.30251427115  |  1   |
|   10   |   423   | 1.22470301081  |  2   |
|   10   |   172   | 1.20107427445  |  3   |
|   7    |    88   | 0.48335224936  |  1   |
|   7    |    95   | 0.470542855845 |  2   |
|   7    |   209   | 0.422433135012 |  3   |
|   9    |   172   | 1.51975569129  |  1   |
|   9    |   204   | 1.46488795678  |  2   |
|   9    |   174   |  1.4442871958  |  3   |
+--------+---------+----------------+------+
[12 rows x 4 columns]



In [53]:

    
# movie data
item_data_cols = ['item_id', 'item_title ', 'release_date', 'video_release_date', 
                  'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 
                  'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film_Noir', 'Horror',
                  'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
item_data_df = pd.read_csv(item_data, sep='|', names = item_data_cols, encoding='latin-1')
print item_data_df.shape
item_data_df.head()









    



(1682, 24)






    Out[53]:







  
    
      
      item_id
      item_title
      release_date
      video_release_date
      IMDb_URL
      unknown
      Action
      Adventure
      Animation
      Children
      ...
      Fantasy
      Film_Noir
      Horror
      Musical
      Mystery
      Romance
      Sci-Fi
      Thriller
      War
      Western
    
  
  
    
      0
      1
      Toy Story (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Toy%20Story%2...
      0
      0
      0
      1
      1
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2
      GoldenEye (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?GoldenEye%20(...
      0
      1
      1
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      2
      3
      Four Rooms (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Four%20Rooms%...
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
    
      3
      4
      Get Shorty (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Get%20Shorty%...
      0
      1
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      4
      5
      Copycat (1995)
      01-Jan-1995
      NaN
      http://us.imdb.com/M/title-exact?Copycat%20(1995)
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      1
      0
      0
    
  

5 rows × 24 columns



In [54]:

    
# user profile
user_data_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
user_data_df = pd.read_csv(user_data, sep='|', names = user_data_cols, encoding='latin-1')
print user_data_df.shape
user_data_df.head()



In [55]:

    
ua_cols = ['user_id', 'item_id', 'rating', 'unix_timestamp']
ratings_train = pd.read_csv(rating_train, sep='\t', names=ua_cols, encoding='latin-1')
ratings_test = pd.read_csv(rating_test, sep='\t', names=ua_cols, encoding='latin-1')
print ratings_train.shape, ratings_test.shape

train_data = turicreate.SFrame(ratings_train)
test_data = turicreate.SFrame(ratings_test)
train_data.head()









    



(90570, 4) (9430, 4)






    Out[55]:





    
        user_id
        item_id
        rating
        unix_timestamp
    
    
        1
        1
        5
        874965758
    
    
        1
        2
        3
        876893171
    
    
        1
        3
        4
        878542960
    
    
        1
        4
        3
        876893119
    
    
        1
        5
        3
        889751712
    
    
        1
        6
        5
        887431973
    
    
        1
        7
        4
        875071561
    
    
        1
        8
        1
        875072484
    
    
        1
        9
        5
        878543541
    
    
        1
        10
        3
        875693118
    

[10 rows x 4 columns]



In [56]:

    
## Turicreate - Factorization Recommender, predict missing ratings
### since a user won't rate all the items, this method is to predict those missing ratings
user_sf = turicreate.SFrame(user_data_df)

item_data_df = item_data_df.drop(['video_release_date', 'IMDb_URL', 'release_date'], axis=1)
item_sf = turicreate.SFrame(item_data_df)

item_sf









    Out[56]:





    
        item_id
        item_title 
        unknown
        Action
        Adventure
        Animation
        Children
        Comedy
        Crime
        Documentary
    
    
        1
        Toy Story (1995)
        0
        0
        0
        1
        1
        1
        0
        0
    
    
        2
        GoldenEye (1995)
        0
        1
        1
        0
        0
        0
        0
        0
    
    
        3
        Four Rooms (1995)
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        4
        Get Shorty (1995)
        0
        1
        0
        0
        0
        1
        0
        0
    
    
        5
        Copycat (1995)
        0
        0
        0
        0
        0
        0
        1
        0
    
    
        6
        Shanghai Triad (Yao a yao
yao dao waipo qiao) ...
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        7
        Twelve Monkeys (1995)
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        8
        Babe (1995)
        0
        0
        0
        0
        1
        1
        0
        0
    
    
        9
        Dead Man Walking (1995)
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        10
        Richard III (1995)
        0
        0
        0
        0
        0
        0
        0
        0
    


    
        Drama
        Fantasy
        Film_Noir
        Horror
        Musical
        Mystery
        Romance
        Sci-Fi
        Thriller
        War
        Western
    
    
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        0
        0
        0
        0
        0
        0
        0
        0
        1
        0
        0
    
    
        0
        0
        0
        0
        0
        0
        0
        0
        1
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        1
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        1
        0
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        0
        0
        0
    
    
        1
        0
        0
        0
        0
        0
        0
        0
        0
        1
        0
    

[1682 rows x 21 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.



In [57]:

    
fac_rem_model = turicreate.factorization_recommender.create(train_data, target='rating',
                                                user_data=user_sf,
                                                item_data=item_sf)









    




Recsys training: model = factorization_recommender






    




Preparing data set.






    




    Data has 90570 observations with 943 users and 1682 items.






    




    Data prepared in: 0.212253s






    




Training factorization_recommender for recommendations.






    




+--------------------------------+--------------------------------------------------+----------+






    




| Parameter                      | Description                                      | Value    |






    




+--------------------------------+--------------------------------------------------+----------+






    




| num_factors                    | Factor Dimension                                 | 8        |






    




| regularization                 | L2 Regularization on Factors                     | 1e-08    |






    




| solver                         | Solver used for training                         | adagrad  |






    




| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-10    |






    




| side_data_factorization        | Assign Factors for Side Data                     | True     |






    




| max_iterations                 | Maximum Number of Iterations                     | 50       |






    




+--------------------------------+--------------------------------------------------+----------+






    




  Optimizing model using SGD; tuning step size.






    




  Using 11321 / 90570 points for tuning the step size.






    




+---------+-------------------+------------------------------------------+






    




| Attempt | Initial Step Size | Estimated Objective Value                |






    




+---------+-------------------+------------------------------------------+






    




| 0       | 1.85185           | Not Viable                               |






    




| 1       | 0.462963          | Not Viable                               |






    




| 2       | 0.115741          | 0.256957                                 |






    




| 3       | 0.0578704         | 0.441567                                 |






    




| 4       | 0.0289352         | 0.668015                                 |






    




+---------+-------------------+------------------------------------------+






    




| Final   | 0.115741          | 0.256957                                 |






    




+---------+-------------------+------------------------------------------+






    




Starting Optimization.






    




+---------+--------------+-------------------+-----------------------+-------------+






    




| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |






    




+---------+--------------+-------------------+-----------------------+-------------+






    




| Initial | 113us        | 1.26801           | 1.12606               |             |






    




+---------+--------------+-------------------+-----------------------+-------------+






    




| 1       | 220.043ms    | 1.02212           | 1.011                 | 0.115741    |






    




| 2       | 541.238ms    | 0.789752          | 0.888674              | 0.115741    |






    




| 3       | 870.654ms    | 0.726265          | 0.852206              | 0.115741    |






    




| 4       | 1.20s        | 0.690086          | 0.830707              | 0.115741    |






    




| 5       | 1.52s        | 0.667022          | 0.816707              | 0.115741    |






    




| 6       | 1.88s        | 0.649894          | 0.806152              | 0.115741    |






    




| 10      | 2.90s        | 0.612406          | 0.782555              | 0.115741    |






    




| 11      | 3.28s        | 0.607195          | 0.779218              | 0.115741    |






    




| 15      | 4.06s        | 0.590597          | 0.768493              | 0.115741    |






    




| 20      | 5.18s        | 0.577918          | 0.760198              | 0.115741    |






    




| 25      | 6.25s        | 0.569356          | 0.754546              | 0.115741    |






    




| 30      | 7.09s        | 0.563028          | 0.75034               | 0.115741    |






    




| 35      | 8.50s        | 0.557705          | 0.746784              | 0.115741    |






    




| 40      | 9.48s        | 0.553874          | 0.744215              | 0.115741    |






    




| 45      | 10.41s       | 0.550585          | 0.742001              | 0.115741    |






    




| 50      | 11.32s       | 0.547802          | 0.740124              | 0.115741    |






    




+---------+--------------+-------------------+-----------------------+-------------+






    




Optimization Complete: Maximum number of passes through the data reached.






    




Computing final objective value and training RMSE.






    




       Final objective value: 0.535105






    




       Final training RMSE: 0.731495



In [46]:

    
train_data









    Out[46]:





    
        userid
        movieid
        rating
        unix_timestamp
    
    
        1
        1
        5
        874965758
    
    
        1
        2
        3
        876893171
    
    
        1
        3
        4
        878542960
    
    
        1
        4
        3
        876893119
    
    
        1
        5
        3
        889751712
    
    
        1
        6
        5
        887431973
    
    
        1
        7
        4
        875071561
    
    
        1
        8
        1
        875072484
    
    
        1
        9
        5
        878543541
    
    
        1
        10
        3
        875693118
    

[90570 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.



In [58]:

    
fac_rem_model









    Out[58]:





Class                            : FactorizationRecommender

Schema
------
User ID                          : user_id
Item ID                          : item_id
Target                           : rating
Additional observation features  : 1
User side features               : ['user_id', 'age', 'gender', 'occupation', 'zip_code']
Item side features               : ['item_id', 'item_title ', 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film_Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

Statistics
----------
Number of observations           : 90570
Number of users                  : 943
Number of items                  : 1682

Training summary
----------------
Training time                    : 12.2277

Model Parameters
----------------
Model class                      : FactorizationRecommender
num_factors                      : 8
binary_target                    : 0
side_data_factorization          : 1
solver                           : auto
nmf                              : 0
max_iterations                   : 50

Regularization Settings
-----------------------
regularization                   : 0.0
regularization_type              : normal
linear_regularization            : 0.0

Optimization Settings
---------------------
init_random_sigma                : 0.01
sgd_convergence_interval         : 4
sgd_convergence_threshold        : 0.0
sgd_max_trial_iterations         : 5
sgd_sampling_block_size          : 131072
sgd_step_adjustment_interval     : 4
sgd_step_size                    : 0.0
sgd_trial_sample_minimum_size    : 10000
sgd_trial_sample_proportion      : 0.125
step_size_decrease_rate          : 0.75
additional_iterations_if_unhealthy : 5
adagrad_momentum_weighting       : 0.9
num_tempering_iterations         : 4
tempering_regularization_start_value : 0.0
track_exact_loss                 : 0



In [59]:

    
fac_rem_model.evaluate_precision_recall(test_data)









    Out[59]:





{'precision_recall_by_user': Columns:
 	user_id	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 16974
 
 Data:
 +---------+--------+-----------+--------+-------+
 | user_id | cutoff | precision | recall | count |
 +---------+--------+-----------+--------+-------+
 |    1    |   1    |    0.0    |  0.0   |   10  |
 |    1    |   2    |    0.0    |  0.0   |   10  |
 |    1    |   3    |    0.0    |  0.0   |   10  |
 |    1    |   4    |    0.0    |  0.0   |   10  |
 |    1    |   5    |    0.0    |  0.0   |   10  |
 |    1    |   6    |    0.0    |  0.0   |   10  |
 |    1    |   7    |    0.0    |  0.0   |   10  |
 |    1    |   8    |    0.0    |  0.0   |   10  |
 |    1    |   9    |    0.0    |  0.0   |   10  |
 |    1    |   10   |    0.0    |  0.0   |   10  |
 +---------+--------+-----------+--------+-------+
 [16974 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'precision_recall_overall': Columns:
 	cutoff	int
 	precision	float
 	recall	float
 
 Rows: 18
 
 Data:
 +--------+-----------------+------------------+
 | cutoff |    precision    |      recall      |
 +--------+-----------------+------------------+
 |   1    | 0.0084835630965 | 0.00084835630965 |
 |   2    | 0.0084835630965 | 0.0016967126193  |
 |   3    | 0.0134323082361 | 0.00402969247084 |
 |   4    | 0.0172322375398 | 0.00689289501591 |
 |   5    | 0.0190880169671 | 0.00954400848356 |
 |   6    | 0.0190880169671 | 0.0114528101803  |
 |   7    | 0.0192395091653 | 0.0134676564157  |
 |   8    | 0.0182926829268 | 0.0146341463415  |
 |   9    | 0.0183810533758 | 0.0165429480382  |
 |   10   | 0.0185577942736 | 0.0185577942736  |
 +--------+-----------------+------------------+
 [18 rows x 3 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}

	userid	itemid	rating	timestamp
0	196	242	3	881250949
1	186	302	3	891717742
2	22	377	1	878887116
3	244	51	2	880606923
4	166	346	1	886397596

userid	movieid	rating	unix_timestamp
1	1	5	874965758
1	2	3	876893171
1	3	4	878542960
1	4	3	876893119
1	5	3	889751712
1	6	5	887431973
1	7	4	875071561
1	8	1	875072484
1	9	5	878543541
1	10	3	875693118

	item_id	item_title	release_date	video_release_date	IMDb_URL	Action	Adventure	Animation	Children	...	Thriller
0	1	Toy Story (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Toy%20Story%2...	0	0	1	1	...	0
1	2	GoldenEye (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?GoldenEye%20(...	1	1	0	0	...	1
2	3	Four Rooms (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Four%20Rooms%...	0	0	0	0	...	1
3	4	Get Shorty (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Get%20Shorty%...	1	0	0	0	...	0
4	5	Copycat (1995)	01-Jan-1995	NaN	http://us.imdb.com/M/title-exact?Copycat%20(1995)	0	0	0	0	...	1

	user_id	age	gender	occupation	zip_code
0	1	24	M	technician	85711
1	2	53	F	other	94043
2	3	23	M	writer	32067
3	4	24	M	technician	43537
4	5	33	F	other	15213

Drama	Sci-Fi	Thriller	War
0	0	0	0
0	0	1	0
0	0	1	0
1	0	0	0
1	0	1	0
1	0	0	0
1	1	0	0
1	0	0	0
1	0	0	0
1	0	0	1

Drama	Sci-Fi	Thriller	War
0	0	0	0
0	0	1	0
0	0	1	0
1	0	0	0
1	0	1	0
1	0	0	0
1	1	0	0
1	0	0	0
1	0	0	0
1	0	0	1

Drama	Sci-Fi	Thriller	War
0	0	0	0
0	0	1	0
0	0	1	0
1	0	0	0
1	0	1	0
1	0	0	0
1	1	0	0
1	0	0	0
1	0	0	0
1	0	0	1