In [1]:
import graphlab
In [2]:
song_data = graphlab.SFrame('./song_data.gl/')
In [3]:
song_data.head()
Out[3]:
In [4]:
graphlab.canvas.set_target('ipynb')
In [5]:
song_data['song'].show()
In [6]:
len(song_data)
Out[6]:
In [7]:
users = song_data['user_id'].unique()
In [8]:
len(users)
Out[8]:
In [9]:
train_data, test_data = song_data.random_split(.8, seed=0)
In [10]:
popularity_model = graphlab.popularity_recommender.create(train_data,
user_id='user_id',
item_id='song')
In [11]:
popularity_model.recommend(users=[users[0]])
Out[11]:
In [12]:
popularity_model.recommend(users=[users[1]])
Out[12]:
In [13]:
personalized_mode = graphlab.item_similarity_recommender.create(train_data,
user_id='user_id',
item_id='song')
In [14]:
personalized_mode.recommend(users=[users[0]])
Out[14]:
In [15]:
personalized_mode.recommend(users=[users[1]])
Out[15]:
In [16]:
personalized_mode.get_similar_items(['With Or Without You - U2'])
Out[16]:
In [17]:
personalized_mode.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])
Out[17]:
In [19]:
import matplotlib.pyplot as plt
%matplotlib inline
model_performance = graphlab.recommender.util.compare_models(test_data,
[popularity_model, personalized_mode],
user_sample=0.05)
In [23]:
len(song_data[song_data['artist'] == 'Kanye West']['user_id'].unique())
Out[23]:
In [24]:
len(song_data[song_data['artist'] == 'Foo Fighters']['user_id'].unique())
Out[24]:
In [25]:
len(song_data[song_data['artist'] == 'Taylor Swift']['user_id'].unique())
Out[25]:
In [26]:
len(song_data[song_data['artist'] == 'Lady GaGa']['user_id'].unique())
Out[26]:
In [27]:
song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')}).sort('total_count', ascending=False)
Out[27]:
In [28]:
song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')}).sort('total_count', ascending=True)
Out[28]:
In [29]:
subset_test_users = test_data['user_id'].unique()[0:10000]
In [31]:
recommendations = personalized_mode.recommend(subset_test_users,k=1)
In [32]:
recommendations.head()
Out[32]:
In [34]:
recommendations.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT()}).sort('count', ascending=False)
Out[34]: