In [53]:
import os, sys
import itertools
import graphlab as gl
import graphlab.aggregate as agg
from tqdm import tqdm_notebook as tqdm
# set canvas path
# gl.canvas.set_target('ipynb')
%matplotlib inline
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [31]:
song_data = gl.SFrame('data/song_data.gl/')
In [32]:
song_data.head()
Out[32]:
In [33]:
len(song_data)
Out[33]:
In [34]:
users = song_data['user_id'].unique()
len(users)
Out[34]:
In [36]:
train_data, test_data = gl.recommender.util.random_split_by_user(
song_data,
'user_id',
'song_id',
max_num_users=0.4*len(users)
)
In [37]:
popularity_model = gl.popularity_recommender.create(train_data, user_id='user_id', item_id='song')
In [38]:
popularity_model.recommend(users=[users[0]])
Out[38]:
In [39]:
popularity_model.recommend(users=[users[1]])
Out[39]:
In [40]:
i2i_model = gl.item_similarity_recommender.create(train_data, user_id='user_id', item_id='song')
In [41]:
i2i_model.recommend(users=[users[0]])
Out[41]:
In [42]:
i2i_model.recommend(users=[users[1]])
Out[42]:
In [43]:
i2i_model.get_similar_items(['With Or Without You - U2'])
Out[43]:
In [61]:
model_names = ['Popularity Recommender', 'Personalized Recommender']
model_performance = gl.recommender.util.compare_models(
test_data,
[popularity_model, i2i_model],
model_names=model_names
)
Compare the personalized model by plotting the gain in MAP@10 (Mean Average Precision) over the popularity model
In [62]:
def get_pr_dataframe(pr_curve):
return pr_curve['precision_recall_overall'].to_dataframe()
def get_lift_in_precision(model_map, baseline_map):
if baseline_map <= 0.0:
raise ValueError('Baseline MAP is 0. Please check')
return round((model_map - baseline_map) / baseline_map * 100, 2)
# Calculate baseline model MAP -- Popularity Recommender
baseline_df = get_pr_dataframe(model_performance[0])
baseline_map = baseline_df['precision'].mean()
# Calculate lifts wrt to baseline for all models
lift_dfs = map(get_pr_dataframe, model_performance)
lifts = []
for model_name, df in zip(model_names, lift_dfs):
lifts.append(get_lift_in_precision(df['precision'].mean(), baseline_map))
# Plots lifts
plt.figure(num=1, figsize=(15, 10), dpi=80)
axis_to_work = plt
plot_data = pd.DataFrame({'model': model_names, 'lift': lifts})
sns.barplot(x='lift', y='model', data=plot_data)
plt.xlabel('%Lift in MAP@10')
plt.ylabel('Models')
sns.despine(top=True, right=True)
axis_to_work.legend()
axis_to_work.show()