In [1]:
import graphlab
In [2]:
song_data = graphlab.SFrame('song_data.gl/')
In [4]:
song_data.head(5)
Out[4]:
In [5]:
graphlab.canvas.set_target('ipynb')
In [6]:
song_data['song'].show()
In [7]:
len(song_data)
Out[7]:
In [8]:
users = song_data['user_id'].unique()
In [9]:
len(users)
Out[9]:
In [10]:
train_data,test_data = song_data.random_split(0.8,seed=0)
In [11]:
popularity_model = graphlab.popularity_recommender.create(train_data,user_id='user_id',item_id='song')
In [12]:
popularity_model.recommend(users=[users[0]])
Out[12]:
In [13]:
popularity_model.recommend(users=[users[1]])
Out[13]:
In [14]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,user_id='user_id', item_id= 'song')
In [15]:
personalized_model.recommend(users=[users[0]])
Out[15]:
In [16]:
personalized_model.recommend(users=[users[1]])
Out[16]:
In [17]:
personalized_model.get_similar_items(['With Or Without You - U2'])
Out[17]:
In [18]:
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
Out[18]:
In [19]:
%matplotlib inline
In [20]:
model_performance = graphlab.recommender.util.compare_models(test_data, [popularity_model,personalized_model],user_sample= 0.05)
In [22]:
kanye_listeners = song_data[song_data['artist'] =='Kanye West']['user_id'].unique()
In [23]:
len(kanye_listeners)
Out[23]:
In [24]:
def artist_listeners(artist):
return len(song_data[song_data['artist'] ==artist]['user_id'].unique())
In [25]:
print artist_listeners('Kanye West')
In [26]:
print artist_listeners('Foo Fighters')
In [27]:
print artist_listeners('Taylor Swift')
In [28]:
print artist_listeners('Lady GaGa')
In [38]:
song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')})
Out[38]:
In [40]:
gr.sort('total_count',ascending=False)
Out[40]:
In [49]:
gr.sort('total_count',ascending=True)
Out[49]:
In [41]:
subset_test_users = test_data['user_id'].unique()[0:10000]
In [43]:
recommended_songs = personalized_model.recommend(subset_test_users,k=1)
In [44]:
recommended_songs
Out[44]:
In [47]:
songs_grp = recommended_songs.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT()})
In [48]:
songs_grp.sort('count',ascending=False)
Out[48]:
In [ ]: