In [5]:
import graphlab
In [6]:
song_data = graphlab.SFrame('song_data.gl/')
In [8]:
users = song_data['user_id'].unique()
In [14]:
kanye_listeners = song_data[song_data['artist'] == 'Kanye West']
foo_listeners = song_data[song_data['artist'] == 'Foo Fighters']
taylor_listeners = song_data[song_data['artist'] == 'Taylor Swift']
lady_listeners = song_data[song_data['artist'] == 'Lady GaGa']
In [15]:
print len(kanye_listeners)
print len(foo_listeners)
print len(taylor_listeners)
print len(lady_listeners)
In [18]:
print len(kanye_listeners['user_id'].unique())
print len(foo_listeners['user_id'].unique())
print len(taylor_listeners['user_id'].unique())
print len(lady_listeners['user_id'].unique())
In [7]:
listen_count = song_data.groupby(key_columns='artist', operations={'total_count': graphlab.aggregate.SUM('listen_count')})
In [11]:
listen_count.sort('total_count', ascending=False)[0]
Out[11]:
In [25]:
listen_count.sort('total_count', ascending=True)[0]
Out[25]:
In [12]:
train_data, test_data = song_data.random_split(.8, seed=0)
In [13]:
personalized_model = graphlab.item_similarity_recommender.create(train_data,
user_id='user_id',
item_id='song')
In [14]:
subset_test_users = test_data['user_id'].unique()[0:10000]
In [16]:
recommendations = personalized_model.recommend(subset_test_users, k=1)
In [17]:
recommend_count = recommendations.groupby(key_columns='song', operations={'count': graphlab.aggregate.COUNT})
In [18]:
recommend_count.sort('count', ascending=False)
Out[18]: