notebook.community

Edit and run



In [51]:

    
# import
import graphlab as gl
from matplotlib import pyplot 

%matplotlib inline



In [32]:

    
# reading data
songs = gl.SFrame('data/song_data.gl/')



In [33]:

    
songs.head()









    Out[33]:





    
        user_id
        song_id
        listen_count
        title
        artist
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOAKIMP12A8C130995
        1
        The Cove
        Jack Johnson
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOBBMDR12A8C13253B
        2
        Entre Dos Aguas
        Paco De Lucia
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOBXHDL12A81C204C0
        1
        Stronger
        Kanye West
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOBYHAJ12A6701BF1D
        1
        Constellations
        Jack Johnson
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SODACBL12A8C13C273
        1
        Learn To Fly
        Foo Fighters
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SODDNQT12A6D4F5F7E
        5
        Apuesta Por El Rock 'N'
Roll ...
        Héroes del Silencio
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SODXRTY12AB0180F3B
        1
        Paper Gangsta
        Lady GaGa
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOFGUAY12AB017B0A8
        1
        Stacked Actors
        Foo Fighters
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOFRQTD12A81C233C0
        1
        Sehr kosmisch
        Harmonia
    
    
        b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
        SOHQWYZ12A6D4FA701
        1
        Heaven's gonna burn your
eyes ...
        Thievery Corporation
feat. Emiliana Torrini ...
    


    
        song
    
    
        The Cove - Jack Johnson
    
    
        Entre Dos Aguas - Paco De
Lucia ...
    
    
        Stronger - Kanye West
    
    
        Constellations - Jack
Johnson ...
    
    
        Learn To Fly - Foo
Fighters ...
    
    
        Apuesta Por El Rock 'N'
Roll - Héroes del ...
    
    
        Paper Gangsta - Lady GaGa
    
    
        Stacked Actors - Foo
Fighters ...
    
    
        Sehr kosmisch - Harmonia
    
    
        Heaven's gonna burn your
eyes - Thievery ...
    

[10 rows x 6 columns]



In [34]:

    
gl.canvas.set_target('ipynb')



In [35]:

    
songs['song'].show()



In [36]:

    
# total 
len(songs)









    Out[36]:





1116609



In [37]:

    
# counting no of users
users = songs['user_id'].unique()



In [38]:

    
len(users)









    Out[38]:





66346

Creating a Recommender System



In [39]:

    
train_data, test_data = songs.random_split(0.8, seed=0)

Most Popular Model



In [40]:

    
popularity_model = gl.popularity_recommender.create(train_data,
                                                   user_id='user_id',
                                                   item_id='song',
                                                   )









    




Recsys training: model = popularity






    




Warning: Ignoring columns song_id, listen_count, title, artist;






    




    To use one of these as a target column, set target = 






    




    and use a method that allows the use of a target.






    




Preparing data set.






    




    Data has 893580 observations with 66085 users and 9952 items.






    




    Data prepared in: 1.30735s






    




893580 observations to process; with 9952 unique items.



In [41]:

    
popularity_model.recommend(users=[users[0]])









    Out[41]:





    
        user_id
        song
        score
        rank
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Sehr kosmisch - Harmonia
        4754.0
        1
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Undo - Björk
        4227.0
        2
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        You're The One - Dwight
Yoakam ...
        3781.0
        3
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Dog Days Are Over (Radio
Edit) - Florence + The ...
        3633.0
        4
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Revelry - Kings Of Leon
        3527.0
        5
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Horn Concerto No. 4 in E
flat K495: II. Romance ...
        3161.0
        6
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Secrets - OneRepublic
        3148.0
        7
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Fireflies - Charttraxx
Karaoke ...
        2532.0
        8
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Tive Sim - Cartola
        2521.0
        9
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Drop The World - Lil
Wayne / Eminem ...
        2053.0
        10
    

[10 rows x 4 columns]



In [42]:

    
popularity_model.recommend(users=[users[1]])









    Out[42]:





    
        user_id
        song
        score
        rank
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Sehr kosmisch - Harmonia
        4754.0
        1
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Undo - Björk
        4227.0
        2
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        You're The One - Dwight
Yoakam ...
        3781.0
        3
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Dog Days Are Over (Radio
Edit) - Florence + The ...
        3633.0
        4
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Revelry - Kings Of Leon
        3527.0
        5
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Horn Concerto No. 4 in E
flat K495: II. Romance ...
        3161.0
        6
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Secrets - OneRepublic
        3148.0
        7
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Hey_ Soul Sister - Train
        2538.0
        8
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Fireflies - Charttraxx
Karaoke ...
        2532.0
        9
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Tive Sim - Cartola
        2521.0
        10
    

[10 rows x 4 columns]

The problem with "Popularity Model" is - It recommends same items to everyone, Not personalized

Personalized Model



In [43]:

    
personalized_model = gl.item_similarity_recommender.create(train_data, 
                                                          user_id='user_id',
                                                          item_id='song')









    




Recsys training: model = item_similarity






    




Warning: Ignoring columns song_id, listen_count, title, artist;






    




    To use one of these as a target column, set target = 






    




    and use a method that allows the use of a target.






    




Preparing data set.






    




    Data has 893580 observations with 66085 users and 9952 items.






    




    Data prepared in: 1.39497s






    




Training model from provided data.






    




Gathering per-item and per-user statistics.






    




+--------------------------------+------------+






    




| Elapsed Time (Item Statistics) | % Complete |






    




+--------------------------------+------------+






    




| 2.005ms                        | 1.5        |






    




| 39.031ms                       | 100        |






    




+--------------------------------+------------+






    




Setting up lookup tables.






    




Processing data in one pass using dense lookup tables.






    




+-------------------------------------+------------------+-----------------+






    




| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |






    




+-------------------------------------+------------------+-----------------+






    




| 218.752ms                           | 0                | 0               |






    




| 1.80s                               | 100              | 9952            |






    




+-------------------------------------+------------------+-----------------+






    




Finalizing lookup tables.






    




Generating candidate set for working with new users.






    




Finished training in 1.86735s



In [44]:

    
personalized_model.recommend(users=[users[0]])









    Out[44]:





    
        user_id
        song
        score
        rank
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Cuando Pase El Temblor -
Soda Stereo ...
        0.0194504536115
        1
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Fireflies - Charttraxx
Karaoke ...
        0.0144737317012
        2
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Love Is A Losing Game -
Amy Winehouse ...
        0.0142865960415
        3
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Marry Me - Train
        0.014133471709
        4
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Secrets - OneRepublic
        0.013591665488
        5
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Sehr kosmisch - Harmonia
        0.0133987894425
        6
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Te Hacen Falta Vitaminas
- Soda Stereo ...
        0.0129302831796
        7
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        OMG - Usher featuring
will.i.am ...
        0.0127778282532
        8
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Y solo se me ocurre
amarte (Unplugged) - ...
        0.0123411279458
        9
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        No Dejes Que... -
Caifanes ...
        0.0121042499175
        10
    

[10 rows x 4 columns]



In [45]:

    
personalized_model.recommend(users=[users[1]])









    Out[45]:





    
        user_id
        song
        score
        rank
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Riot In Cell Block Number
Nine - Dr Feelgood ...
        0.0374999940395
        1
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Sei Lá Mangueira -
Elizeth Cardoso ...
        0.0331632643938
        2
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        The Stallion - Ween
        0.0322580635548
        3
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Rain - Subhumans
        0.0314159244299
        4
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        West One (Shine On Me) -
The Ruts ...
        0.0306771993637
        5
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Back Against The Wall -
Cage The Elephant ...
        0.0301204770803
        6
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Life Less Frightening -
Rise Against ...
        0.0284431129694
        7
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        A Beggar On A Beach Of
Gold - Mike And The ...
        0.0230024904013
        8
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Audience Of One - Rise
Against ...
        0.0193938463926
        9
    
    
        279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
        Blame It On The Boogie -
The Jacksons ...
        0.0189873427153
        10
    

[10 rows x 4 columns]



In [46]:

    
personalized_model.get_similar_items(['With Or Without You - U2'])









    Out[46]:





    
        song
        similar
        score
        rank
    
    
        With Or Without You - U2
        I Still Haven't Found
What I'm Looking For  ...
        0.042857170105
        1
    
    
        With Or Without You - U2
        Hold Me_ Thrill Me_ Kiss
Me_ Kill Me - U2 ...
        0.0337349176407
        2
    
    
        With Or Without You - U2
        Window In The Skies - U2
        0.0328358411789
        3
    
    
        With Or Without You - U2
        Vertigo - U2
        0.0300751924515
        4
    
    
        With Or Without You - U2
        Sunday Bloody Sunday - U2
        0.0271317958832
        5
    
    
        With Or Without You - U2
        Bad - U2
        0.0251798629761
        6
    
    
        With Or Without You - U2
        A Day Without Me - U2
        0.0237154364586
        7
    
    
        With Or Without You - U2
        Another Time Another
Place - U2 ...
        0.0203251838684
        8
    
    
        With Or Without You - U2
        Walk On - U2
        0.0202020406723
        9
    
    
        With Or Without You - U2
        Get On Your Boots - U2
        0.0196850299835
        10
    

[10 rows x 4 columns]



In [47]:

    
personalized_model.get_similar_items(['Chan Chan (Live) - Buena Vista Social Club'])









    Out[47]:





    
        song
        similar
        score
        rank
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Murmullo - Buena Vista
Social Club ...
        0.188118815422
        1
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        La Bayamesa - Buena Vista
Social Club ...
        0.18719214201
        2
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Amor de Loca Juventud -
Buena Vista Social Club ...
        0.184834122658
        3
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Diferente - Gotan Project
        0.0214592218399
        4
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Mistica - Orishas
        0.0205761194229
        5
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Hotel California - Gipsy
Kings ...
        0.0193049907684
        6
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Nací Orishas - Orishas
        0.0191571116447
        7
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Gitana - Willie Colon
        0.018796980381
        8
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Le Moulin - Yann Tiersen
        0.018796980381
        9
    
    
        Chan Chan (Live) - Buena
Vista Social Club ...
        Criminal - Gotan Project
        0.0187793374062
        10
    

[10 rows x 4 columns]

Quantitaive comparison between the models



In [59]:

    
model_perf = gl.recommender.util.compare_models(test_data, 
                                                [popularity_model, personalized_model],
                                               user_sample=0.05)









    



compare_models: using 2931 users to estimate model performance
PROGRESS: Evaluate model M0






    




recommendations finished on 1000/2931 queries. users per second: 10861.7






    




recommendations finished on 2000/2931 queries. users per second: 11387.7






    



Precision and recall summary statistics by cutoff
+--------+-----------------+------------------+
| cutoff |  mean_precision |   mean_recall    |
+--------+-----------------+------------------+
|   1    |  0.029341521665 | 0.00836478052445 |
|   2    | 0.0262708973047 | 0.0145547683675  |
|   3    | 0.0242238143978 | 0.0189623968283  |
|   4    | 0.0227737973388 |  0.023278342265  |
|   5    | 0.0206073012624 | 0.0266831106647  |
|   6    | 0.0195610144433 | 0.0299014482996  |
|   7    | 0.0186187064386 | 0.0335070072276  |
|   8    | 0.0179972705561 | 0.0365114054981  |
|   9    | 0.0170969331665 | 0.0390378547492  |
|   10   |  0.016308427158 | 0.0408956551996  |
+--------+-----------------+------------------+
[10 rows x 3 columns]

PROGRESS: Evaluate model M1






    




recommendations finished on 1000/2931 queries. users per second: 9382.98






    




recommendations finished on 2000/2931 queries. users per second: 9845.33






    



Precision and recall summary statistics by cutoff
+--------+-----------------+-----------------+
| cutoff |  mean_precision |   mean_recall   |
+--------+-----------------+-----------------+
|   1    |  0.190378710338 | 0.0588421911063 |
|   2    |  0.161037188673 | 0.0944179198243 |
|   3    |  0.142954622996 |  0.121660503283 |
|   4    |  0.128880928011 |   0.1419621591  |
|   5    |  0.116820197885 |  0.158859981933 |
|   6    |  0.106732628227 |  0.17204992483  |
|   7    | 0.0992347809134 |  0.185091506857 |
|   8    | 0.0937819856704 |  0.199017245245 |
|   9    |  0.087872929224 |  0.209418004544 |
|   10   |  0.08307744797  |  0.218613683955 |
+--------+-----------------+-----------------+
[10 rows x 3 columns]



In [61]:

    
gl.show_comparison(model_perf,[popularity_model, personalized_model])









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-61-530936e332a4> in <module>()
----> 1 gl.show_comparison(model_perf,[popularity_model, personalized_model])

C:\tools\Anaconda3\envs\gl-env\lib\site-packages\graphlab\toolkits\comparison.pyc in show_comparison(model_comp, models)
     51     """
     52     if type(model_comp) != _graphlab.SFrame:
---> 53         raise TypeError('"model_comp" must be a non empty SFrame')
     54     if type(models) != list or not all(map(lambda m: isinstance(m, _Recommender), models)):
     55         raise TypeError('"models" must be a list with Model elements')

TypeError: "model_comp" must be a non empty SFrame



In [62]:

    
model_perf









    Out[62]:





[{'precision_recall_by_user': Columns:
  	user_id	str
  	cutoff	int
  	precision	float
  	recall	float
  	count	int
  
  Rows: 52758
  
  Data:
  +-------------------------------+--------+-----------+--------+-------+
  |            user_id            | cutoff | precision | recall | count |
  +-------------------------------+--------+-----------+--------+-------+
  | 0007c0e74728ca9ef0fe4eb7f7... |   1    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   2    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   3    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   4    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   5    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   6    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   7    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   8    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   9    |    0.0    |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   10   |    0.0    |  0.0   |   2   |
  +-------------------------------+--------+-----------+--------+-------+
  [52758 rows x 5 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
  'precision_recall_overall': Columns:
  	cutoff	int
  	precision	float
  	recall	float
  
  Rows: 18
  
  Data:
  +--------+-----------------+------------------+
  | cutoff |    precision    |      recall      |
  +--------+-----------------+------------------+
  |   1    |  0.029341521665 | 0.00836478052445 |
  |   2    | 0.0262708973047 | 0.0145547683675  |
  |   3    | 0.0242238143978 | 0.0189623968283  |
  |   4    | 0.0227737973388 |  0.023278342265  |
  |   5    | 0.0206073012624 | 0.0266831106647  |
  |   6    | 0.0195610144433 | 0.0299014482996  |
  |   7    | 0.0186187064386 | 0.0335070072276  |
  |   8    | 0.0179972705561 | 0.0365114054981  |
  |   9    | 0.0170969331665 | 0.0390378547492  |
  |   10   |  0.016308427158 | 0.0408956551996  |
  +--------+-----------------+------------------+
  [18 rows x 3 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.},
 {'precision_recall_by_user': Columns:
  	user_id	str
  	cutoff	int
  	precision	float
  	recall	float
  	count	int
  
  Rows: 52758
  
  Data:
  +-------------------------------+--------+----------------+--------+-------+
  |            user_id            | cutoff |   precision    | recall | count |
  +-------------------------------+--------+----------------+--------+-------+
  | 0007c0e74728ca9ef0fe4eb7f7... |   1    |      0.0       |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   2    |      0.0       |  0.0   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   3    | 0.333333333333 |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   4    |      0.25      |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   5    |      0.2       |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   6    | 0.166666666667 |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   7    | 0.142857142857 |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   8    |     0.125      |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   9    | 0.111111111111 |  0.5   |   2   |
  | 0007c0e74728ca9ef0fe4eb7f7... |   10   |      0.1       |  0.5   |   2   |
  +-------------------------------+--------+----------------+--------+-------+
  [52758 rows x 5 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
  'precision_recall_overall': Columns:
  	cutoff	int
  	precision	float
  	recall	float
  
  Rows: 18
  
  Data:
  +--------+-----------------+-----------------+
  | cutoff |    precision    |      recall     |
  +--------+-----------------+-----------------+
  |   1    |  0.190378710338 | 0.0588421911063 |
  |   2    |  0.161037188673 | 0.0944179198243 |
  |   3    |  0.142954622996 |  0.121660503283 |
  |   4    |  0.128880928011 |   0.1419621591  |
  |   5    |  0.116820197885 |  0.158859981933 |
  |   6    |  0.106732628227 |  0.17204992483  |
  |   7    | 0.0992347809134 |  0.185091506857 |
  |   8    | 0.0937819856704 |  0.199017245245 |
  |   9    |  0.087872929224 |  0.209418004544 |
  |   10   |  0.08307744797  |  0.218613683955 |
  +--------+-----------------+-----------------+
  [18 rows x 3 columns]
  Note: Only the head of the SFrame is printed.
  You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}]



In [66]:

    
popularity_model.show(view='Evaluation')
personalized_model.show(view='Evaluation')

Quiz



In [93]:

    
Kanye = songs[songs['artist']=='Kanye West']['user_id'].unique() 
Foo = songs[songs['artist']=='Foo Fighters']['user_id'].unique()
Taylor = songs[songs['artist']=='Taylor Swift']['user_id'].unique()
Lady = songs[songs['artist']=='Lady GaGa']['user_id'].unique()

print(len(Kanye), len(Foo), len(Taylor), len(Lady))









    



(2522, 2055, 3246, 2928)



In [72]:

    
Agg_listen_data = songs.groupby(key_columns='artist', operations={'total_count': gl.aggregate.SUM('listen_count')}).sort('total_count')



In [73]:

    
Agg_listen_data.head(2)









    Out[73]:





    
        artist
        total_count
    
    
        William Tabbert
        14
    
    
        Reel Feelings
        24
    

[2 rows x 2 columns]



In [94]:

    
Agg_listen_data.tail(2)









    Out[94]:





    
        artist
        total_count
    
    
        Dwight Yoakam
        40619
    
    
        Kings Of Leon
        43218
    

[2 rows x 2 columns]



In [102]:

    
print(Agg_listen_data[Agg_listen_data['artist']=='Kanye West'])
print(Agg_listen_data[Agg_listen_data['artist']=='Foo Fighters'])
print(Agg_listen_data[Agg_listen_data['artist']=='Taylor Swift'])
print(Agg_listen_data[Agg_listen_data['artist']=='Lady GaGa'])
print(Agg_listen_data[Agg_listen_data['artist']=='Kings of Leon'])
print(Agg_listen_data[Agg_listen_data['artist']=='Coldplay'])
print(Agg_listen_data[Agg_listen_data['artist']=='The Cool Kids'])
print(Agg_listen_data[Agg_listen_data['artist']=='William Tabbert'])
print(Agg_listen_data[Agg_listen_data['artist']=='Velvet Underground & Nico'])









    



+------------+-------------+
|   artist   | total_count |
+------------+-------------+
| Kanye West |     9992    |
+------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+--------------+-------------+
|    artist    | total_count |
+--------------+-------------+
| Foo Fighters |     9504    |
+--------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+--------------+-------------+
|    artist    | total_count |
+--------------+-------------+
| Taylor Swift |    19376    |
+--------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+-----------+-------------+
|   artist  | total_count |
+-----------+-------------+
| Lady GaGa |    12224    |
+-----------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+--------+-------------+
| artist | total_count |
+--------+-------------+
+--------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+----------+-------------+
|  artist  | total_count |
+----------+-------------+
| Coldplay |    35362    |
+----------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+---------------+-------------+
|     artist    | total_count |
+---------------+-------------+
| The Cool Kids |      73     |
+---------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+-----------------+-------------+
|      artist     | total_count |
+-----------------+-------------+
| William Tabbert |      14     |
+-----------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.
+---------------------------+-------------+
|           artist          | total_count |
+---------------------------+-------------+
| Velvet Underground & Nico |      80     |
+---------------------------+-------------+
[? rows x 2 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.

Using groupby-aggregate to find the most recommended songs:



In [75]:

    
train_data, test_data = songs.random_split(0.8, seed=0)



In [81]:

    
item_similarity =  gl.item_similarity_recommender.create(train_data, 
                                                         item_id='song',
                                                        user_id='user_id')









    




Recsys training: model = item_similarity






    




Warning: Ignoring columns song_id, listen_count, title, artist;






    




    To use one of these as a target column, set target = 






    




    and use a method that allows the use of a target.






    




Preparing data set.






    




    Data has 893580 observations with 66085 users and 9952 items.






    




    Data prepared in: 1.36244s






    




Training model from provided data.






    




Gathering per-item and per-user statistics.






    




+--------------------------------+------------+






    




| Elapsed Time (Item Statistics) | % Complete |






    




+--------------------------------+------------+






    




| 2.004ms                        | 4.5        |






    




| 33.022ms                       | 100        |






    




+--------------------------------+------------+






    




Setting up lookup tables.






    




Processing data in one pass using dense lookup tables.






    




+-------------------------------------+------------------+-----------------+






    




| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |






    




+-------------------------------------+------------------+-----------------+






    




| 230.679ms                           | 0                | 0               |






    




| 2.06s                               | 100              | 9952            |






    




+-------------------------------------+------------------+-----------------+






    




Finalizing lookup tables.






    




Generating candidate set for working with new users.






    




Finished training in 2.16096s



In [82]:

    
subset_test_users = test_data['user_id'].unique()[0:10000]



In [83]:

    
item_similarity.recommend(subset_test_users,k=1)









    




recommendations finished on 1000/10000 queries. users per second: 10355.1






    




recommendations finished on 2000/10000 queries. users per second: 11519






    




recommendations finished on 3000/10000 queries. users per second: 11896.4






    




recommendations finished on 4000/10000 queries. users per second: 12530.5






    




recommendations finished on 5000/10000 queries. users per second: 12877.7






    




recommendations finished on 6000/10000 queries. users per second: 13163.3






    




recommendations finished on 7000/10000 queries. users per second: 13223.5






    




recommendations finished on 8000/10000 queries. users per second: 13436.3






    




recommendations finished on 9000/10000 queries. users per second: 13314.5






    




recommendations finished on 10000/10000 queries. users per second: 13131.7






    Out[83]:





    
        user_id
        song
        score
        rank
    
    
        c66c10a9567f0d82ff31441a9
fd5063e5cd9dfe8 ...
        Cuando Pase El Temblor -
Soda Stereo ...
        0.0194504536115
        1
    
    
        c067c22072a17d33310d7223d
7b79f819e48cf42 ...
        Grind With Me (Explicit
Version) - Pretty Ricky ...
        0.0459424376488
        1
    
    
        f6c596a519698c97f1591ad89
f540d76f6a04f1a ...
        Hey_ Soul Sister - Train
        0.0238929539919
        1
    
    
        696787172dd3f5169dc94deef
97e427cee86147d ...
        Senza Una Donna (Without
A Woman) - Zucchero / ...
        0.017026577677
        1
    
    
        3a7111f4cdf3c5a85fd4053e3
cc2333562e1e0cb ...
        Heartbreak Warfare - John
Mayer ...
        0.0298416515191
        1
    
    
        532e98155cbfd1e1a474a28ed
96e59e50f7c5baf ...
        Jive Talkin' (Album
Version) - Bee Gees ...
        0.0118288653237
        1
    
    
        ee43b175ed753b2e2bce806c9
03d4661ad351a91 ...
        Ricordati Di Noi -
Valerio Scanu ...
        0.0305171211561
        1
    
    
        e372c27f6cb071518ae500589
ae02c126954c148 ...
        Fall Out - The Police
        0.0819672048092
        1
    
    
        83b1428917b47a6b130ed471b
09033820be78a8c ...
        Clocks - Coldplay
        0.0440290331841
        1
    
    
        39487deef9345b1e22881245c
abf4e7c53b6cf6e ...
        Black Mirror - Arcade
Fire ...
        0.0417737685717
        1
    

[10000 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.



In [90]:

    
most_recommended = songs.groupby(key_columns='song', operations={'count': gl.aggregate.COUNT()})



In [ ]:

user_id	song_id	listen_count	title	artist
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOAKIMP12A8C130995	1	The Cove	Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOBBMDR12A8C13253B	2	Entre Dos Aguas	Paco De Lucia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOBXHDL12A81C204C0	1	Stronger	Kanye West
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOBYHAJ12A6701BF1D	1	Constellations	Jack Johnson
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SODACBL12A8C13C273	1	Learn To Fly	Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SODDNQT12A6D4F5F7E	5	Apuesta Por El Rock 'N' Roll ...	Héroes del Silencio
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SODXRTY12AB0180F3B	1	Paper Gangsta	Lady GaGa
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOFGUAY12AB017B0A8	1	Stacked Actors	Foo Fighters
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOFRQTD12A81C233C0	1	Sehr kosmisch	Harmonia
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ...	SOHQWYZ12A6D4FA701	1	Heaven's gonna burn your eyes ...	Thievery Corporation feat. Emiliana Torrini ...

user_id	song	score	rank
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Sehr kosmisch - Harmonia	4754.0	1
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Undo - Björk	4227.0	2
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	You're The One - Dwight Yoakam ...	3781.0	3
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Dog Days Are Over (Radio Edit) - Florence + The ...	3633.0	4
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Revelry - Kings Of Leon	3527.0	5
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Horn Concerto No. 4 in E flat K495: II. Romance ...	3161.0	6
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Secrets - OneRepublic	3148.0	7
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Fireflies - Charttraxx Karaoke ...	2532.0	8
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Tive Sim - Cartola	2521.0	9
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Drop The World - Lil Wayne / Eminem ...	2053.0	10

user_id	song	score	rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Sehr kosmisch - Harmonia	4754.0	1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Undo - Björk	4227.0	2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	You're The One - Dwight Yoakam ...	3781.0	3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Dog Days Are Over (Radio Edit) - Florence + The ...	3633.0	4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Revelry - Kings Of Leon	3527.0	5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Horn Concerto No. 4 in E flat K495: II. Romance ...	3161.0	6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Secrets - OneRepublic	3148.0	7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Hey_ Soul Sister - Train	2538.0	8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Fireflies - Charttraxx Karaoke ...	2532.0	9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Tive Sim - Cartola	2521.0	10

user_id	song	score	rank
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Cuando Pase El Temblor - Soda Stereo ...	0.0194504536115	1
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Fireflies - Charttraxx Karaoke ...	0.0144737317012	2
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Love Is A Losing Game - Amy Winehouse ...	0.0142865960415	3
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Marry Me - Train	0.014133471709	4
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Secrets - OneRepublic	0.013591665488	5
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Sehr kosmisch - Harmonia	0.0133987894425	6
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Te Hacen Falta Vitaminas - Soda Stereo ...	0.0129302831796	7
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	OMG - Usher featuring will.i.am ...	0.0127778282532	8
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	Y solo se me ocurre amarte (Unplugged) - ...	0.0123411279458	9
c66c10a9567f0d82ff31441a9 fd5063e5cd9dfe8 ...	No Dejes Que... - Caifanes ...	0.0121042499175	10

user_id	song	score	rank
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Riot In Cell Block Number Nine - Dr Feelgood ...	0.0374999940395	1
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Sei Lá Mangueira - Elizeth Cardoso ...	0.0331632643938	2
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	The Stallion - Ween	0.0322580635548	3
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Rain - Subhumans	0.0314159244299	4
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	West One (Shine On Me) - The Ruts ...	0.0306771993637	5
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Back Against The Wall - Cage The Elephant ...	0.0301204770803	6
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Life Less Frightening - Rise Against ...	0.0284431129694	7
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	A Beggar On A Beach Of Gold - Mike And The ...	0.0230024904013	8
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Audience Of One - Rise Against ...	0.0193938463926	9
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ...	Blame It On The Boogie - The Jacksons ...	0.0189873427153	10

song	similar	score	rank
With Or Without You - U2	I Still Haven't Found What I'm Looking For ...	0.042857170105	1
With Or Without You - U2	Hold Me_ Thrill Me_ Kiss Me_ Kill Me - U2 ...	0.0337349176407	2
With Or Without You - U2	Window In The Skies - U2	0.0328358411789	3
With Or Without You - U2	Vertigo - U2	0.0300751924515	4
With Or Without You - U2	Sunday Bloody Sunday - U2	0.0271317958832	5
With Or Without You - U2	Bad - U2	0.0251798629761	6
With Or Without You - U2	A Day Without Me - U2	0.0237154364586	7
With Or Without You - U2	Another Time Another Place - U2 ...	0.0203251838684	8
With Or Without You - U2	Walk On - U2	0.0202020406723	9
With Or Without You - U2	Get On Your Boots - U2	0.0196850299835	10

song	similar	score	rank
Chan Chan (Live) - Buena Vista Social Club ...	Murmullo - Buena Vista Social Club ...	0.188118815422	1
Chan Chan (Live) - Buena Vista Social Club ...	La Bayamesa - Buena Vista Social Club ...	0.18719214201	2
Chan Chan (Live) - Buena Vista Social Club ...	Amor de Loca Juventud - Buena Vista Social Club ...	0.184834122658	3
Chan Chan (Live) - Buena Vista Social Club ...	Diferente - Gotan Project	0.0214592218399	4
Chan Chan (Live) - Buena Vista Social Club ...	Mistica - Orishas	0.0205761194229	5
Chan Chan (Live) - Buena Vista Social Club ...	Hotel California - Gipsy Kings ...	0.0193049907684	6
Chan Chan (Live) - Buena Vista Social Club ...	Nací Orishas - Orishas	0.0191571116447	7
Chan Chan (Live) - Buena Vista Social Club ...	Gitana - Willie Colon	0.018796980381	8
Chan Chan (Live) - Buena Vista Social Club ...	Le Moulin - Yann Tiersen	0.018796980381	9
Chan Chan (Live) - Buena Vista Social Club ...	Criminal - Gotan Project	0.0187793374062	10