In [2]:
import graphlab as gl
In [3]:
song_data = gl.SFrame('song_data.gl/')
In [5]:
song_data.head()
Out[5]:
In [4]:
gl.canvas.set_target('ipynb')
In [6]:
song_data['song'].show()
In [7]:
len(song_data)
Out[7]:
In [34]:
users = song_data['user_id'].unique()
In [9]:
len(users)
Out[9]:
In [6]:
train_data, test_data = song_data.random_split(.8, seed=0)
In [11]:
popularity_model = gl.popularity_recommender.create(train_data,
user_id='user_id',
item_id='song'
)
In [14]:
popularity_model.recommend(users=[users[0]])
Out[14]:
In [16]:
popularity_model.recommend(users=[users[1]])
Out[16]:
In [18]:
personalized_model = gl.item_similarity_recommender.create(train_data,
user_id = 'user_id',
item_id = 'song'
)
In [19]:
personalized_model.recommend(users= [users[0]])
Out[19]:
In [20]:
personalized_model.recommend(users=[users[1]])
Out[20]:
In [21]:
personalized_model.get_similar_items(['The Stallion - Ween'])
Out[21]:
In [22]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
Out[22]:
In [27]:
import matplotlib
%matplotlib inline
model_performance = gl.recommender.util.compare_models(test_data,
[popularity_model, personalized_model],
user_sample=0.05)
In [28]:
import matplotlib.pyplot as plt
%matplotlib inline
fig, ax = plt.subplots()
pr_curves_by_model = [res['precision_recall_overall'] for res in model_performance]
pr_curve = pr_curves_by_model[0].sort('recall')
ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
'blue', label='M1')
pr_curve = pr_curves_by_model[1].sort('recall')
ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
'green', label='M2')
ax.set_title('Precision-Recall Averaged Over Users')
ax.set_xlabel('Recall')
ax.set_ylabel('Precision')
ax.legend()
fig.show()
In [35]:
Kanye_West = song_data[song_data['artist']=='Kanye West']
In [38]:
Kanye_West['user_id'].unique().show()
In [41]:
Kanye_West_users=Kanye_West['user_id'].unique()
In [42]:
len(Kanye_West_users)
Out[42]:
In [45]:
Foo_Fighters_users = song_data[song_data['artist']=='Foo Fighters']['user_id'].unique()
In [47]:
len(Foo_Fighters_users)
Out[47]:
In [53]:
Taylor_Swift_users = song_data[song_data['artist']=='Taylor Swift']['user_id'].unique()
In [55]:
len(Taylor_Swift_users)
Out[55]:
In [56]:
Lady_GaGa_users = song_data[song_data['artist']=='Lady GaGa']['user_id'].unique()
In [58]:
len(Lady_GaGa_users)
Out[58]:
In [8]:
groupby_artist = song_data.groupby(key_columns='artist', operations={'total_count': gl.aggregate.SUM('listen_count')})
In [10]:
groupby_artist
Out[10]:
In [19]:
groupby_artist.sort('total_count')
Out[19]:
In [20]:
groupby_artist.sort('total_count', ascending=False)
Out[20]:
In [22]:
train_data,test_data = song_data.random_split(.8, seed=0)
In [23]:
subset_test_data = test_data['user_id'].unique()[0:10000]
In [25]:
personalized_model = gl.item_similarity_recommender.create(train_data,
user_id = 'user_id',
item_id = 'song')
In [27]:
# 1 recommendation for each of these users
recommneded_song = personalized_model.recommend(subset_test_data, k=1)
In [ ]:
recommneded_song = song_data.groupby(key_column = 'song')