In [25]:
import graphlab as gl
gl.canvas.set_target("ipynb")
In [26]:
implicit = gl.SFrame('implicit')
explicit = gl.SFrame('explicit')
items = gl.SFrame('items')
ratings = gl.SFrame('ratings')
In [5]:
ratings.show()
This allows us to evaluate generalization ability.
In [27]:
train, valid = gl.recommender.util.random_split_by_user(implicit)
Compute the number of times each item has been rated.
In [28]:
num_ratings_per_item = train.groupby('item_id', {'num_users': gl.aggregate.COUNT})
items = items.join(num_ratings_per_item, on='item_id')
Transform the count into a categorical variable using the feature_engineering
module.
In [29]:
binner = gl.feature_engineering.FeatureBinner(features=['num_users'], strategy='logarithmic', num_bins=5)
items = binner.fit_transform(items)
Convert each genre element into a dictionary and each year to an integer.
In [30]:
items['genres'] = items['genres'].apply(lambda x: {k:1 for k in x})
items['year'] = items['year'].astype(int)
In [31]:
items
Out[31]:
In [32]:
m0 = gl.item_similarity_recommender.create(train)
In [33]:
m1 = gl.ranking_factorization_recommender.create(train, max_iterations=10)
In [34]:
m2 = gl.ranking_factorization_recommender.create(train,
item_data=items[['item_id', 'year']],
max_iterations=10)
In [35]:
m3 = gl.ranking_factorization_recommender.create(train,
item_data=items[['item_id', 'year', 'genres']],
max_iterations=10)
Create a precision/recall plot to compare the recommendation quality of the above models given our heldout data.
In [40]:
model_comparison = gl.compare(valid, [m0, m1, m2, m3], user_sample=.3)
In [24]:
gl.show_comparison(model_comparison, [m0, m1, m2, m3, m5])
In [ ]: