In [1]:
import graphlab as gl
ratings = gl.SFrame.read_csv("../../data/netflix/netflix_mm.train", delimiter="\t", header=False,
column_type_hints=[int,int,int]);
ratings.rename({'X1':'movieid', 'X2':'userid', 'X3':'rating'});
In [2]:
ratings.head()
Out[2]:
In [3]:
ratings['userid'].max()
Out[3]:
In [4]:
training, testing = ratings.random_split(0.9)
In [5]:
#training, testing = gl.recommender.random_split_by_user(ratings, 'userid', 'movieid', max_num_users=1000, item_test_proportion=0.3)
#print training.num_rows(), testing.num_rows()
In [6]:
m = gl.recommender.factorization_recommender.create(training, 'userid', 'movieid', 'rating',
num_factors=512, max_iterations=4, solver='sgd')
In [18]:
opts=m.get_current_options()
opts["regularization"]=1e-4
In [20]:
opts=m.get_current_options()
opts["regularization"]
Out[20]:
In [19]:
# Look at model statistics
m
Out[19]:
In [8]:
# Evaluate rmse (average prediction error) on the test set
m.evaluate(testing)
Out[8]:
In [12]:
opts=m.get_current_options()
In [13]:
opts
Out[13]:
In [17]:
opts["regularization"]
Out[17]:
In [ ]: