Matrix Factorization Models


In [22]:
%matplotlib inline
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt


def PlotHeatmap(X):
    c = 3.
    plt.figure(figsize=(X.shape[1]/c,X.shape[0]/c))
    plt.imshow(X, interpolation='nearest',vmax=1,vmin=0)
    plt.set_cmap('gray_r')
    plt.gca().set_xticks([])
    plt.gca().set_yticks([])
    plt.show()    


N = 8
M = 10
R = 1

A = np.random.rand(N,R)**2
A = A/np.sum(A,axis=0)
B = np.random.rand(R,M)**2
B = B/np.sum(B,axis=1).reshape((R,1))
Y = A.dot(B)

PlotHeatmap(A)
PlotHeatmap(B)
PlotHeatmap(Y)


Singular Value Decomposition


In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc
import scipy.linalg as la
import scipy.misc

X = sc.misc.ascent()
fig = plt.figure(figsize=(7,7))
plt.gray()
plt.imshow(X)
plt.show()



In [9]:
U,S,Vt = la.svd(X)

M,N = X.shape

for rnk in range(1,20,2):
    Sr = np.zeros_like(S)
    Sr[0:rnk] = S[0:rnk]
    Sig = la.diagsvd(Sr, M, N)

    A2 = np.dot(np.dot(U, Sig), Vt)

    fig = plt.figure(figsize=(5,5))
    plt.gray()
    plt.imshow(A2)
    plt.show()


Recommendation Systems


In [10]:
import pandas as pd
%matplotlib inline
import matplotlib as mpl
import matplotlib.pylab as plt
import numpy as np

rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('data/ml-100k/u.data', sep='\t', header=None, names=rnames)
#ratings

inames = ['movie_id', 'movie_title', 'release_date', 'video_release_date',
        'IMDb_URL','unknown', 'Action', 'Adventure','Animation',
        'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
        'Film_Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci_Fi',
        'Thriller', 'War', 'Western']
items = pd.read_table('data/ml-100k/u.item', sep='|', header=None, names=inames)
#items

unames = ['user_id','age','gender','occupation', 'zip_code']
users = pd.read_table('data/ml-100k/u.user', sep='|', header=None, names=unames)
users


Out[10]:
user_id age gender occupation zip_code
0 1 24 M technician 85711
1 2 53 F other 94043
2 3 23 M writer 32067
3 4 24 M technician 43537
4 5 33 F other 15213
5 6 42 M executive 98101
6 7 57 M administrator 91344
7 8 36 M administrator 05201
8 9 29 M student 01002
9 10 53 M lawyer 90703
10 11 39 F other 30329
11 12 28 F other 06405
12 13 47 M educator 29206
13 14 45 M scientist 55106
14 15 49 F educator 97301
15 16 21 M entertainment 10309
16 17 30 M programmer 06355
17 18 35 F other 37212
18 19 40 M librarian 02138
19 20 42 F homemaker 95660
20 21 26 M writer 30068
21 22 25 M writer 40206
22 23 30 F artist 48197
23 24 21 F artist 94533
24 25 39 M engineer 55107
25 26 49 M engineer 21044
26 27 40 F librarian 30030
27 28 32 M writer 55369
28 29 41 M programmer 94043
29 30 7 M student 55436
... ... ... ... ... ...
913 914 44 F other 08105
914 915 50 M entertainment 60614
915 916 27 M engineer N2L5N
916 917 22 F student 20006
917 918 40 M scientist 70116
918 919 25 M other 14216
919 920 30 F artist 90008
920 921 20 F student 98801
921 922 29 F administrator 21114
922 923 21 M student E2E3R
923 924 29 M other 11753
924 925 18 F salesman 49036
925 926 49 M entertainment 01701
926 927 23 M programmer 55428
927 928 21 M student 55408
928 929 44 M scientist 53711
929 930 28 F scientist 07310
930 931 60 M educator 33556
931 932 58 M educator 06437
932 933 28 M student 48105
933 934 61 M engineer 22902
934 935 42 M doctor 66221
935 936 24 M other 32789
936 937 48 M educator 98072
937 938 38 F technician 55038
938 939 26 F student 33319
939 940 32 M administrator 02215
940 941 20 M student 97229
941 942 48 F librarian 78209
942 943 22 M student 77841

943 rows × 5 columns


In [2]:
NRows = max(ratings['user_id'])
NCols = max(ratings['movie_id'])


X = np.nan*np.ones((NRows, NCols))

for k in range(len(ratings)):
    i = ratings['user_id'].ix[k]-1
    j = ratings['movie_id'].ix[k]-1
    r = ratings['rating'].ix[k]
    X[i,j] = r

In [4]:
Data = X[6:16,3:20]

plt.imshow(Data, interpolation='nearest',vmax=5,vmin=0)
plt.colorbar()
plt.set_cmap('jet')
plt.xlabel('Movies')
plt.ylabel('Users')
plt.show()



In [17]:
users.ix[0:100]


Out[17]:
user_id age gender occupation zip_code
0 1 24 M technician 85711
1 2 53 F other 94043
2 3 23 M writer 32067
3 4 24 M technician 43537
4 5 33 F other 15213
5 6 42 M executive 98101
6 7 57 M administrator 91344
7 8 36 M administrator 05201
8 9 29 M student 01002
9 10 53 M lawyer 90703
10 11 39 F other 30329
11 12 28 F other 06405
12 13 47 M educator 29206
13 14 45 M scientist 55106
14 15 49 F educator 97301
15 16 21 M entertainment 10309
16 17 30 M programmer 06355
17 18 35 F other 37212
18 19 40 M librarian 02138
19 20 42 F homemaker 95660
20 21 26 M writer 30068
21 22 25 M writer 40206
22 23 30 F artist 48197
23 24 21 F artist 94533
24 25 39 M engineer 55107
25 26 49 M engineer 21044
26 27 40 F librarian 30030
27 28 32 M writer 55369
28 29 41 M programmer 94043
29 30 7 M student 55436
... ... ... ... ... ...
71 72 48 F administrator 73034
72 73 24 M student 41850
73 74 39 M scientist T8H1N
74 75 24 M entertainment 08816
75 76 20 M student 02215
76 77 30 M technician 29379
77 78 26 M administrator 61801
78 79 39 F administrator 03755
79 80 34 F administrator 52241
80 81 21 M student 21218
81 82 50 M programmer 22902
82 83 40 M other 44133
83 84 32 M executive 55369
84 85 51 M educator 20003
85 86 26 M administrator 46005
86 87 47 M administrator 89503
87 88 49 F librarian 11701
88 89 43 F administrator 68106
89 90 60 M educator 78155
90 91 55 M marketing 01913
91 92 32 M entertainment 80525
92 93 48 M executive 23112
93 94 26 M student 71457
94 95 31 M administrator 10707
95 96 25 F artist 75206
96 97 43 M artist 98006
97 98 49 F executive 90291
98 99 20 M student 63129
99 100 36 M executive 90254
100 101 15 M student 05146

101 rows × 5 columns


In [5]:
def nmf_kl_multiplicative(D, M, W, H, EPOCH=1):
    
    MD = D.copy()
    MD[M==0] = 0
    for e in range(EPOCH):
        Xhat = W.dot(H)
        W = W*((MD/Xhat).dot(H.T)/np.dot(M, H.T)) 

        Xhat = W.dot(H)
        H = H*(W.T.dot(MD/Xhat)/np.dot(W.T, M))
        
        #print np.sum(np.abs(MD - M*Xhat))/np.sum(M)
    
    return W, H

In [8]:
#Rank
R = 3

# Data
Nr = Data.shape[0]
Nc = Data.shape[1]

# Initialize
W = np.random.rand(Nr, R)*100
H = np.random.rand(R, Nc)*100

Mask = np.ones_like(Data)
Mask[np.isnan(Data)] = 0

W,H = nmf_kl_multiplicative(Data, Mask, W, H, EPOCH=1000)
Xhat = W.dot(H)

def ShowMatrix(X, title=''):
    plt.figure()
    plt.imshow(X, interpolation='nearest',vmax=5,vmin=0)
    plt.colorbar()
    plt.set_cmap('jet')
    plt.xlabel('Movies')
    plt.ylabel('Users')
    plt.title(title)
    plt.show()
    
ShowMatrix(Data, 'original')
ShowMatrix(Xhat, 'estimate')


(From the Readme)

Description

This dataset contains social networking, tagging, and music artist listening information 
from a set of 2K users from Last.fm online music system.
http://www.last.fm 

The dataset is released in the framework of the 2nd International Workshop on 
Information Heterogeneity and Fusion in Recommender Systems (HetRec 2011) 
http://ir.ii.uam.es/hetrec2011 
at the 5th ACM Conference on Recommender Systems (RecSys 2011)
http://recsys.acm.org/2011 

Data statistics

* 1892 users
* 17632 artists

* 12717 bi-directional user friend relations, i.e. 25434 (user_i, user_j) pairs
    * avg. 13.443 friend relations per user
* 92834 user-listened artist relations, i.e. tuples [user, artist, listeningCount]
    * avg. 49.067 artists most listened by each user
    * avg. 5.265 users who listened each artist
* 11946 tags  
* 186479 tag assignments (tas), i.e. tuples [user, tag, artist]
    * avg. 98.562 tas per user
    * avg. 14.891 tas per artist
    * avg. 18.930 distinct tags used by each user
    * avg. 8.764 distinct tags used for each artist


In [2]:
import scipy.sparse as sparse
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline


/Users/cemgil/anaconda/envs/py27/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

ListeningCount of user to artist


In [3]:
# Create ListeningCount(user, artist) data
df = pd.read_csv('./data/hetrec2011-lastfm-2k/user_artists.dat', sep='\t')
userID2idx = {ids: i for i,ids in enumerate(sorted(df['userID'].unique()))}
idx2userID = {i: ids for i,ids in enumerate(sorted(df['userID'].unique()))}

artistID2idx = {ids: i for i,ids in enumerate(sorted(df['artistID'].unique()))}
idx2artistID = {i: ids for i,ids in enumerate(sorted(df['artistID'].unique()))}

NumOfUsers = len(userID2idx)
NumOfArtists = len(artistID2idx)

X_lil = sparse.lil_matrix((NumOfUsers, NumOfArtists))
for rec in df.itertuples():
    i = userID2idx[int(rec[1])]
    j = artistID2idx[int(rec[2])]
    X_lil[i, j] = float(rec[3])

In [4]:
plt.figure(figsize=(12,4))
plt.spy(X_lil[0:,0:],markersize=1)
plt.show()


Artist names


In [5]:
df = pd.read_csv('./data/hetrec2011-lastfm-2k/artists.dat', sep='\t')

artistID2artistName = {r[1]: r[2] for r in df.ix[:,['id','name']].itertuples()}
artistName2artistID = {r[2]: r[1] for r in df.ix[:,['id','name']].itertuples()}

artistName2artistID


Out[5]:
{'Harry Potter Movie Theme': 7254,
 'Alexandra Burke': 915,
 'Los Bunkers': 13603,
 'Juliana Pasha': 6002,
 'Antonio Meliveo': 6030,
 'Back Miller': 7705,
 'Caifanes': 1501,
 'S.O.D.': 4803,
 'Kyte': 4174,
 'The Pretenders': 5550,
 'Esther Phillips': 15772,
 'Everything Is Made in China': 4228,
 'Endstille': 3822,
 'The Duke Spirit': 9484,
 'Foundation': 4148,
 'Empire! Empire! (I Was A Lonely Estate)': 12487,
 'All For Nothing': 10573,
 'Neil Young & Crazy Horse': 9543,
 'Tyra Banks': 4925,
 'And So I Watch You From Afar': 11272,
 'Junior Ferrari': 16072,
 'flood feelings': 18203,
 'Tracktor Bowling': 508,
 'Magda Femme & Iwona W\xc4\x99growska': 9036,
 'Ricki-Lee': 2564,
 'XP8': 1332,
 'SPL': 7958,
 'Malefique': 11365,
 'Bikini Kill': 3744,
 'Peter Horrevorts': 12374,
 'Sister Hazel': 14317,
 'Ania Wyszkoni': 14902,
 'Ceza': 12420,
 'Vibe Tribe': 10544,
 '\xd0\x9a\xd0\xb0\xd1\x81\xd1\x82\xd0\xb0': 2193,
 'Roma Kenga': 11171,
 '\xe5\x9d\x82\xe6\x9c\xac\xe9\xbe\x8d\xe4\xb8\x80': 6075,
 'Noise Ratchet': 15432,
 'Necros Christos': 13684,
 'Eddy Arnold': 12684,
 'Teresa Salgueiro': 13285,
 'Fiorella Mannoia': 5624,
 'Bachir Attar': 17341,
 'Fokus': 5057,
 'Chris Tomlin': 11223,
 'Turma do Pagode': 4464,
 'Handshakes And Highfives': 5962,
 'FC/Kahuna': 15624,
 'Padre Marcelo Rossi': 14574,
 'Max Tannone': 7544,
 'Capella De Ministrers': 13591,
 'Txus Di Fellatio': 9244,
 'Magic Box': 15158,
 'Ottoman Military Band': 17124,
 'Neuromantik': 5703,
 'Burzum': 4271,
 'Kirk Whalum': 9194,
 'Cerys Matthews': 11915,
 'Abominable Putridity': 3819,
 'Terminal Choice': 3188,
 'Dino': 10158,
 'BOB, Hayley Williams and Eminem': 16120,
 'A Wilhelm Scream': 5631,
 'Things People Say': 2283,
 'PMMP': 3915,
 'New Trolls': 14947,
 'Sandy': 335,
 'Twisted Sister': 2341,
 'Rotor': 9925,
 'A Black Rose Burial': 6018,
 'Busta Rhymes': 2919,
 'm-flo': 8408,
 'David Nevue': 1774,
 'Robin Guthrie': 7879,
 'Run Level Zero': 16913,
 'Samantha Newark': 11242,
 'Rod Piazza & The Blues Instigators': 2890,
 'Paula Abdul': 4915,
 'Hockey': 3370,
 'Jason Walker': 8356,
 'Reale Accademia di Musica': 14972,
 "Bonnie 'Prince' Billy": 4532,
 'Shwayze': 3338,
 'Mad Season': 10537,
 '\xeb\xb0\xb0\xec\xb9\x98\xea\xb8\xb0': 12659,
 'Fantan Mojah': 12240,
 'Death in Vegas': 10524,
 'Secret Affair': 17478,
 'William Control': 1102,
 '36 Crazyfists': 818,
 'Juego De Dos': 8357,
 'AaRON': 8957,
 'Nikka Costa': 10016,
 'D.R.I.': 3780,
 'Shining': 7055,
 'Sultan Selim III': 17128,
 'Tulipa Ruiz': 4717,
 'Esthetic Education': 4998,
 'Altered Images': 6780,
 'Olu': 16077,
 'The Pretty Things': 594,
 'Hiroshima Will Burn': 11816,
 'Avi\xc3\xb5es do Forr\xc3\xb3': 13497,
 'stabyouintheheadandeatyourfaceoff': 14176,
 'fLako x Noir': 17303,
 'Bon Iver': 445,
 'Hyper Crush': 5307,
 'An April March': 17374,
 'Jolette': 12559,
 'Brokenkites': 11019,
 'Goodie Mob': 18073,
 'Rachel Stevens': 911,
 'Grzegorz Tomczak': 18745,
 'Rock of Heltah Skeltah': 18065,
 'Alcoholic Faith Mission': 7828,
 'Shantel': 8239,
 'Kings of Leon': 228,
 'Chimarruts': 3384,
 'Cold': 496,
 'Yrrow': 7474,
 '\xd0\x9f\xd0\xbe\xd0\xbb\xd0\xb8\xd1\x82\xd0\x97\xd0\xb5\xd0\xba': 4907,
 'Bad Brains': 3792,
 '\xd0\xa4\xd0\xb0\xd0\xba\xd1\x82\xd0\xbe\xd1\x80 \xd0\xa1\xd1\x82\xd1\x80\xd0\xb0\xd1\x85\xd0\xb0': 4501,
 'Stomper 98': 9623,
 'Michel Polnareff': 10017,
 '{{{Sunset}}}': 12608,
 'City In The Sky': 4010,
 'Nat King Cole': 2458,
 'Cannabis Corpse': 9129,
 'der Gast in der Falle': 16256,
 'Junior Senior': 4389,
 'Cybo': 13036,
 'Javelin': 12748,
 'WET FINGERS': 14899,
 'Surfer Blood': 15172,
 'Wheatus': 12482,
 'Eddie Kendricks': 13821,
 'fIREHOSE': 12063,
 'Stabilo': 15453,
 'Morton Feldman': 6168,
 'Nate57': 10883,
 '(sic)monic': 14872,
 'Fourteen 14': 3979,
 'The Korgis': 9993,
 'Raphael Saadiq': 17606,
 'The Alchemist': 6900,
 'Nikolas Metaxas': 643,
 'Vypsan\xc3\xa1 fiXa': 17840,
 'Marilyn Mansoon': 9409,
 'Revolver': 8062,
 'Gerry Rafferty': 4363,
 'Ruth Ruth': 4374,
 'Marius M\xc3\xbcller-Westernhagen': 10176,
 'Plus-Tech Squeeze Box': 15090,
 'Basia': 74,
 'Cassandra Steen': 347,
 'The Beloved': 7934,
 'Vieriv\xc3\xa4 Viiksiportieeri': 6145,
 'Whitney Houston': 251,
 'Smoke DZA': 14280,
 'Sandy e Junior': 3332,
 'Headscan': 1265,
 'Sebastian Bach': 7117,
 'Shunsuke Kida': 13851,
 'Ricky Martin': 1475,
 'Montell Jordan': 15922,
 'X-Mal Deutschland': 10794,
 'Gonjasufi': 8214,
 'Teenage Cool Kids': 12488,
 'SPK': 2042,
 'Vico C': 14540,
 'Vinterland': 1273,
 'Wintersun': 2969,
 'The Primitives': 3296,
 'Final Sacrifice': 13680,
 'Kyuss': 881,
 'Juan Gabriel': 7851,
 'Alex Ubago': 8375,
 '\xe3\x82\xbe\xe3\x83\xad': 12798,
 'Gustav Mahler': 9552,
 'Bladerunner': 16571,
 'The Broadways': 4200,
 'The Mission': 8668,
 'Baby Rasta': 12592,
 'Carbon Based Lifeforms': 747,
 'When Saints Go Machine': 15717,
 'Hateform': 6822,
 '\xe3\x82\xad\xe3\x83\xa3\xe3\x83\xb3\xe3\x82\xbc\xe3\x83\xab': 12810,
 'Matt Hendricks': 14016,
 'Cdatakill': 15633,
 'Heinrich Ignaz Franz von Biber': 16319,
 'Paloma Faith': 4252,
 'Dash Berlin Feat Solid Sessions': 10701,
 'XXX Maniak': 18206,
 'Paquita La Del Barrio': 14374,
 'Borysewicz-Kukiz': 8165,
 'Replace\xc4\x80': 14555,
 'Wilderness': 8677,
 'Ludacris': 1402,
 'Temple of the Dog': 9346,
 'Uffie': 5149,
 'Abney Park': 16206,
 'The Five Keys': 16303,
 'Novastar': 16503,
 'The Pioneers': 9590,
 'Spongebob Squarepants': 5956,
 'DJ Dean': 8779,
 'Luna': 7155,
 'Holy Terror': 15200,
 'A Radio With Guts': 4192,
 'Sigh': 15302,
 'Mory Kant\xc3\xa9': 7596,
 'AbraSKAdabra': 7339,
 'Soso': 13790,
 'X-Dream': 1255,
 'The Qemists': 3812,
 'Five for Fighting': 3459,
 'Falling Red': 10443,
 'Alex Swings, Oscar Sings!': 15417,
 'Metropolitan Jazz Affair': 16338,
 'Oscar Peterson Trio': 12127,
 'Chase Coy': 6612,
 'Arrowwood': 9994,
 'The Shortwave Set': 4066,
 'Wizards': 933,
 'Nocte Obducta': 1267,
 'Pzychobitch': 11186,
 'Luke Bryan': 3583,
 'Black Widow': 7045,
 'Frente!': 14239,
 'Sleeping at Last': 14184,
 'Syrup16g': 15077,
 'Nature Living': 8269,
 'Specimen': 3448,
 'Plastilina Mosh': 13929,
 'Japanther': 3344,
 'The Jesus and Mary Chain': 3279,
 'Iron & Wine and Calexico': 11801,
 'iron jesus (featuring jahlili)': 13958,
 'Christa Black': 18264,
 'The Boomtown Rats': 15771,
 'Miles Davis': 610,
 'Artie Shaw': 7437,
 'Ratos de Por\xc3\xa3o': 4791,
 'Mathew Jonson': 14774,
 'A': 6923,
 'The Manges': 13246,
 'Teen Idols': 16878,
 'Memphis May Fire': 8473,
 'Raymond Scott': 13951,
 'Hideo Osaka Ensemble': 12458,
 'DE DE MOUSE': 15088,
 'Revolting Cocks': 876,
 'Lake of Tears': 8420,
 '\xd0\x98\xd0\xb3\xd1\x80\xd0\xb0 \xd0\xa1\xd0\xbb\xd0\xbe\xd0\xb2': 4689,
 'L7': 1817,
 'Wael Kfoury': 4984,
 'Frankie Ruiz': 1499,
 'ShamRain': 8953,
 'Seven Wiser': 15978,
 'The Armada': 16446,
 'Little Brother': 1839,
 'The Jelly Jam': 13139,
 'Cesare Basile': 18429,
 'Arashi': 14310,
 'Dirt Nasty': 6530,
 'Peter Baumann': 12828,
 'Dbs E A Quadrilha': 9174,
 "The Kings of Nuthin'": 9603,
 'Erik Hassle': 11728,
 'Huntingtons': 13242,
 'DCUP': 14556,
 'An Obscure Signal': 4018,
 'Zac Efron and Cast': 7392,
 'Astrud Gilberto': 3156,
 'Political Asylum': 14113,
 'Shpongle': 1226,
 'Don Carlos': 3166,
 'Francisco T\xc3\xa1rrega': 11671,
 'Timpuri noi': 15935,
 'Aereda': 9365,
 'Richie Sambora': 6291,
 'Teta Lando': 243,
 'Nando Reis': 4818,
 'Magda W\xc3\xb3jcik': 5725,
 'El Chojin': 17625,
 'Hank Williams III': 4536,
 '\xd0\x91\xd0\xb0\xd1\x87\xd0\xb8\xd0\xbd\xd1\x81\xd0\xba\xd0\xb8\xd0\xb9 \xd0\xb8 \xd0\xa1\xd1\x82\xd0\xb8\xd0\xbb\xd0\xbb\xd0\xb0\xd0\xb2\xd0\xb8\xd0\xbd': 3045,
 'Jerry Goldsmith': 7156,
 'The Sugarplastic': 5410,
 'Nikki Williams': 5293,
 'Carlos Puebla': 18695,
 'Bonde da Stronda': 7061,
 'Raney Shockne Feat. Stella Moon': 5283,
 'Rampkorv': 2011,
 'Sissel Kyrkjeb\xc3\xb8': 13435,
 'The Dead Cool Dropouts': 16924,
 'Kurban': 13510,
 'Toma': 5796,
 'Stacey Kent': 18400,
 'John Buzon Trio': 10373,
 'Tahiti 80': 7667,
 'Rush - Signals': 12382,
 'S. Ice feat. $lick': 16114,
 'Night Ranger': 2856,
 'The Cataracs': 2085,
 'Infant Sorrow': 11256,
 'Les Anges De La Nuit': 1333,
 'Francis Lai': 12674,
 'The Intelligence': 14142,
 'Carburator vs. Dark Clitoris Philosofum': 6992,
 'Don Rimini': 6049,
 'Boubacar Traor\xc3\xa9': 7569,
 '\xed\x83\x9c\xec\x96\x91': 2107,
 'Multipass': 6445,
 'Fall of Efrafa': 11624,
 'Schelmish': 7604,
 'In Strict Confidence': 4876,
 "Yngwie J. Malmsteen's Rising Force": 5838,
 '38th Parallel': 12698,
 'Maigra': 10090,
 'Felix da Housecat': 9860,
 'Reks': 8179,
 'Libera': 1780,
 'Cardo': 14784,
 'Acidwolf': 18149,
 'Herbert': 17525,
 'The Brothers Martin': 15567,
 'Up, Bustle and Out': 8232,
 'Lucio Aquilina': 14758,
 'Ion Dissonance': 4021,
 'Steve Angello & Sebastian Ingrosso': 7308,
 'Duck Sauce': 12897,
 'Catharsis': 16438,
 'Bruce Dickinson & Montserrat Cabelle': 17271,
 'The Dave Clark Five': 5403,
 '\xec\x8a\xb9\xeb\xa6\xac': 12596,
 'Die Happy': 9233,
 'The Pains of Being Pure at Heart': 2538,
 'Jesse Lee': 5291,
 'Erica Dunham': 18150,
 'MP Band/ Tear Down The Lies!': 10112,
 'Liquid Liquid': 16466,
 'Rick Caldwell': 11882,
 'Agoria': 17970,
 'La Vida Cuesta Libertades': 4507,
 'Rymdkraft': 4403,
 'Ligeia': 9798,
 'Stylophonic': 14202,
 'Alex Smoke': 11200,
 'I Hear Sirens': 5808,
 'Subsollo': 18248,
 'Fennesz': 3504,
 'Veil of Maya': 3997,
 'Shakira': 701,
 'Unbroken': 17187,
 'Stanfour': 4263,
 'Tha Blue Herb': 16159,
 'Curse': 9697,
 'Bad English': 12160,
 'Wayne Jarrett': 3165,
 'Roadrunner United': 6133,
 'William Onyeabor': 7597,
 'Arabic Songs': 18452,
 'Elize': 16807,
 'Medicine': 16270,
 'Montrose': 15986,
 'Ghost Brigade': 2374,
 'Pearl Jam': 614,
 'Bar\xc3\xb3n Rojo': 5809,
 'Ghosts': 11793,
 'Paula Toller': 15215,
 'The Jeff Healey Band': 9352,
 'Babes in Toyland': 9468,
 'Aerobitch': 6279,
 'Pan\xe2\x80\xa2American': 6162,
 'Yoko Ono': 11970,
 'Coheed and Cambria': 3621,
 'M Blanket': 12502,
 'Agnetha F\xc3\xa4ltskog': 1465,
 'Oficina G3': 8728,
 'Paulo C\xc3\xa9sar Baruk': 7249,
 'Knxwledge': 10551,
 '\xd0\x92\xd0\xb8\xd0\xbd\xd0\xbe\xd0\xb3\xd1\x80\xd0\xb0\xd0\xb4\xd0\xbd\xd1\x8b\xd0\xb9 \xd0\x94\xd0\xb5\xd0\xbd\xd1\x8c': 17413,
 'Aloha From Hell': 2435,
 'Ravi Shankar': 1219,
 'Necron99': 7993,
 'Preschool Tea Party Massacre': 5066,
 'Discharge': 3540,
 'MC Lars': 5304,
 'Donkeyboy': 5726,
 'Mal\xc3\xb3n': 17178,
 'Willy DeVille': 13268,
 'Haloo Helsinki!': 14070,
 'Ocelot': 5336,
 '\xc3\x81rea restrita': 11158,
 'Sopor Aeternus & The Ensemble of Shadows': 39,
 'Adam Ficek': 8059,
 'Neuro Dubel': 2734,
 'Lil Mama': 15744,
 'MAA': 14242,
 'Helena Paparizou': 637,
 'Sarah Brightman': 2044,
 'Onra': 1953,
 'The (International) Noise Conspiracy': 8470,
 'The Copyrights': 13253,
 'Trouble Over Tokyo': 10736,
 'Steve Lacy With Don Cherry': 4645,
 'Paco Bello': 7629,
 'Egg': 11435,
 'Tim Barry': 13617,
 '\xd0\x91\xd0\xb8-2': 3047,
 'Hora': 15290,
 'Takanashi Yasuharu': 8292,
 'The Urgency': 15195,
 'wecamewithbrokenteeth': 1548,
 'Quatro Por Um': 8725,
 'Kinky': 7073,
 'Hori': 7086,
 'Furious Styles': 10580,
 'Smile Smile': 14798,
 'Agua Bella': 5740,
 'Ash Ra Tempel': 3510,
 'Crucified Barbara': 3093,
 'Petra': 8013,
 'Stone Gossard': 12835,
 'Chat': 15263,
 'Elysion': 16426,
 'miaou': 11483,
 'DJ Ear': 6100,
 'Jeff Tweedy': 13338,
 'Bear in Heaven': 10616,
 'Bjornstad, Darling, Rypdal & Christensen': 18164,
 'Obituary': 4275,
 'Realicide': 3342,
 'Alicia Wiley': 10200,
 'Brendon Urie': 10983,
 '[spunge]': 15182,
 'maniax memori': 12313,
 'Gabriel Ananda': 14761,
 'Page & Plant': 9351,
 "Jack's Mannequin": 1105,
 'Deicide': 4265,
 'SLAPPER': 11794,
 'Bliss': 12890,
 'Access To Arasaka': 11307,
 'Loudon Wainwright III': 11417,
 'Department S': 18603,
 'Franco Levine': 5593,
 'Ten Yard Fight': 17949,
 'Alchemist Project': 8890,
 'Audrey Horne': 2365,
 'Gotan Project': 11765,
 "Vex'd": 18318,
 'New Dawn Foundation': 16018,
 'seis35': 12516,
 'JoMo': 4518,
 'Hubert Kah': 1064,
 'Kaizers Orchestra': 1093,
 'Marco Beltrami': 16956,
 'Young Heretics': 8621,
 'Shaman': 7101,
 'regsgnep': 17402,
 "Who's Who": 7317,
 '\xd0\x90\xd0\xbb\xd0\xbb\xd0\xb0 \xd0\x9f\xd1\x83\xd0\xb3\xd0\xb0\xd1\x87\xd1\x91\xd0\xb2\xd0\xb0': 11147,
 'Tori Amos': 961,
 'Harmonic 313': 16742,
 'Alexander': 337,
 'Catch Side': 4513,
 'Savino': 18254,
 'Tamara Angel': 3932,
 'Nengo Vieira': 8733,
 'Admiral Angry': 14165,
 'D-Bo': 10871,
 'Wiser Time': 4355,
 'Jacques Loussier Trio': 18358,
 'Cazuza': 3741,
 'Negative': 6235,
 '\xe7\x94\xb0\xe6\x9d\x91\xe3\x82\x86\xe3\x81\x8b\xe3\x82\x8a': 10235,
 'Toots and the Maytals': 8121,
 'Gang of Four': 7076,
 'Theory of a Deadman': 6249,
 'Here Comes The Kraken': 8296,
 'seabright': 10388,
 'Zac Brown Band': 5649,
 'Venus Verse': 17140,
 '\xe5\xa0\x80\xe6\xb1\x9f\xe7\x94\xb1\xe8\xa1\xa3': 17720,
 'Modjo': 16735,
 'Aquaria': 4340,
 'Yerzmyey': 10015,
 'Michael McDonald': 11871,
 'Would-Be-Goods': 13428,
 'Provision': 1084,
 'Kasabian': 969,
 'Lake Heartbeat': 16685,
 'Jessy Matador': 655,
 'Gerhard Oppitz, Dmitry Sitkovetzky, David Geringas': 14009,
 'Nina Nastasia': 9971,
 'Fernando Delgadillo': 10541,
 'VV Brown': 901,
 'The Ark': 3266,
 '\xd0\x9e\xd0\xba\xd0\xb5\xd0\xb0\xd0\xbd \xd0\x95\xd0\xbb\xd1\x8c\xd0\xb7\xd0\xb8': 1791,
 'The Dickies': 15917,
 'Bee Gees': 2031,
 'Parkway Drive': 805,
 'A.C. Newman': 9318,
 'Charles Hamilton': 12751,
 'Chicago': 5452,
 'Sessizlik': 10280,
 'Desert Dwellers': 11536,
 '\xd0\x9a\xd0\xb0\xd1\x80\xd0\xb0\xd0\xbd\xd0\xb4\xd0\xb0\xd1\x88': 9788,
 'One Man Army and the Undead Quartet': 10088,
 'Chacda': 17074,
 'Goodnight Sunrise': 6610,
 '\xd0\x9b\xd1\x91\xd1\x85\xd0\xb0 \xd0\x9d\xd0\xb8\xd0\xba\xd0\xbe\xd0\xbd\xd0\xbe\xd0\xb2': 13857,
 'Tony Yayo': 6622,
 'Ass': 10830,
 'Christopher Von Uckermann': 1430,
 'Jimmy Smith': 12117,
 'Noel Rosa': 6638,
 '\xd0\x9f\xd1\x81\xd0\xb8\xd1\x85\xd0\xb5\xd1\x8f': 758,
 'Yma Sumac': 14079,
 'Timbaland ft.Nelly Furtado and Soshy': 9899,
 'Geraldo Pino': 7583,
 'J Roddy Walston and The Business': 17850,
 'Questionmark Asylum': 16082,
 'Bane': 4204,
 'Bang': 7796,
 'The Be Good Tanyas': 11809,
 'Papas da L\xc3\xadngua': 14341,
 'Kevin Johansen': 1765,
 'Blakroc': 16089,
 'Ash': 4742,
 'Great Expectations': 10511,
 'Mordida': 17534,
 'Anya Marina': 7276,
 '\xe8\x97\xa4\xe6\x9c\xac\xe7\xbe\x8e\xe8\xb2\xb4': 17727,
 'Nikki Sixx': 5005,
 'Pifco': 17069,
 'Witch Hunt': 17948,
 'Dynamic Masters': 13195,
 'The Vacation': 8488,
 'Bilal Serbest': 2651,
 'Wisp': 2161,
 'Freezepop': 6544,
 'The Allman Brothers Band': 8111,
 'PMtoday': 3998,
 'Kissey Asplund': 18185,
 'Men Without Hats': 6065,
 'Hadise': 652,
 'Thesis Sahib': 13794,
 'Buhurizade Mustafa Itri Efendi': 17136,
 'Starfucker': 6303,
 'G\xc3\xa5te': 5363,
 'Brainiac': 15386,
 'Tristan Prettyman': 8358,
 'Tyrese': 11067,
 'Dolly Parton, Loretta Lynn & Tammy Wynette': 15401,
 'We vs. Death': 15759,
 'Stare Dobre Ma\xc5\x82\xc5\xbce\xc5\x84stwo': 9928,
 'Brooke Fraser': 14185,
 'Qari Syed Sadaqat Ali': 12451,
 'Stravinsky, Igor (1882-1971)': 16420,
 'Teedra Moses': 8074,
 '\xd0\xae\xd1\x80\xd0\xb8\xd0\xb9 \xd0\xa8\xd0\xb5\xd0\xb2\xd1\x87\xd1\x83\xd0\xba': 6272,
 '\xe4\xbc\x8d\xe4\xbd\xb0': 6514,
 'FACT': 8401,
 'Clueso': 7643,
 'Nechivile': 14575,
 'Turmion K\xc3\xa4til\xc3\xb6t': 4161,
 'Vision Talk': 13203,
 'General Patton vs. The X-Ecutioners': 4360,
 'DJ Drez': 3141,
 'Harvey Danger': 17849,
 'Tinyfolk': 10854,
 'The McClymonts': 8318,
 'A.F.I.': 5935,
 'Pereza': 9777,
 'GreenGender': 11203,
 'Ella Fitzgerald & Louis Armstrong': 12186,
 'MC Hammer': 17437,
 'Norma Loy': 10594,
 'For the Fallen Dreams': 8277,
 'KT Wanderlust': 14787,
 'Borys': 9019,
 'Kill Paradise': 6596,
 'Bonded By Blood': 14304,
 '40 \xd0\xa3\xd0\x9a\xd0\x92': 10482,
 'Chris Salvatore': 12625,
 'Arawak': 16348,
 'Alan Wilder': 1078,
 '\xd0\x9b\xd0\xb5\xd0\xbd\xd0\xb8\xd0\xbd\xd0\xb0 \xd0\x9f\xd0\xb0\xd0\xba\xd0\xb5\xd1\x82': 16188,
 'Capsule': 2101,
 'Gary Jules': 1388,
 'Little Big Town': 2443,
 'Coaxial': 14174,
 'Acrnym': 2680,
 'Fuertes Convicciones': 5145,
 'The Scene Aesthetic': 3033,
 'Tricky': 4316,
 'DJ Premier': 18062,
 'Chris Willis': 14558,
 'Cry of Love': 4373,
 '\xe7\x9c\x9f\xe7\x90\x86\xe7\xb5\xb5': 16092,
 'Marillion': 3264,
 'Double Nation': 8889,
 'Patrick Doyle': 4927,
 'Narziss': 7906,
 'Clancy Eccles': 9611,
 'Anthony Callea': 13549,
 'Kenny Rogers': 11769,
 'Streetlight Manifesto': 4208,
 'Popa Chubby': 13274,
 'La Toya Jackson': 4919,
 'Headplate': 5013,
 'H.U.V.A. Network': 7017,
 'Loretta Lynn': 4535,
 'Paul Kalkbrenner': 2824,
 'Tuck Everlasting Soundtrack': 7257,
 'The Charlottes': 16264,
 'Walls of Jericho': 4346,
 'Master Margherita': 11522,
 'Jay Reatard': 5782,
 'Pit10': 12417,
 'joa': 13301,
 'Alejandro Magno': 6518,
 'I See Stars': 2426,
 '\xd0\x92FI': 2185,
 'A Change of Pace': 4741,
 'Aelp\xc3\xa9acha': 2217,
 'Barry Harris': 5779,
 'skins': 16566,
 'VANILLA BEANS': 13648,
 'Cry Of The Afflicted': 14582,
 'Nautilus Pompilius': 1796,
 'King Missile': 2483,
 'Jill Scott': 263,
 'Leonel Garc\xc3\xada': 3061,
 'Kenickie': 17901,
 'Grouper': 6410,
 'Bauda': 16636,
 'Demon Hunter': 4170,
 'Platinum Blonde': 13012,
 'The Epoxies': 15839,
 'Lights of Euphoria': 1321,
 'Advent': 18543,
 'Edgar Broughton Band': 7882,
 '\xce\x9c\xcf\x8d\xcf\x81\xcf\x89\xce\xbd\xce\xb1\xcf\x82 \xce\xa3\xcf\x84\xcf\x81\xce\xb1\xcf\x84\xce\xae\xcf\x82': 18308,
 'Tina Dickow': 6351,
 'The Haunted': 7941,
 'Buffy the Vampire Slayer Cast': 13551,
 'White Apple Tree': 16619,
 'DMX': 6626,
 'Reverend and The Makers': 7186,
 'Edwin McCain': 14329,
 'I Set My Friends On Fire': 5067,
 'Jarabe de Palo': 7623,
 'Thalles': 12029,
 'Panic! at the Disco feat. Britney Spears and Gwen Stefani': 16719,
 'Guillaume de Machaut': 9149,
 'Kiwa': 5345,
 'Shobaleader One': 7489,
 'Bill Bailey': 15217,
 'The Meters': 8119,
 '\xd0\x9f\xd1\x80\xd0\xb8\xd0\xba\xd0\xbb\xd1\x8e\xd1\x87\xd0\xb5\xd0\xbd\xd0\xb8\xd1\x8f \xd0\xad\xd0\xbb\xd0\xb5\xd0\xba\xd1\x82\xd1\x80\xd0\xbe\xd0\xbd\xd0\xb8\xd0\xba\xd0\xbe\xd0\xb2': 7368,
 'Kari Jobe': 12680,
 'Michele Adamson': 10823,
 'Joe Dassin': 7130,
 'Cheb Zergui': 17362,
 'Cage': 9125,
 'Boytronic': 7344,
 'Los Fastidios': 2034,
 'Doc Watson': 3000,
 'Orphaned Land': 4613,
 '\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd \xd0\xa3\xd1\x80\xd0\xb3\xd0\xb0\xd0\xbd\xd1\x82': 7996,
 'Deluxe Trio': 10803,
 'Roman Fl\xc3\xbcgel': 15788,
 'Lee UHF & Andy Freestyle': 6093,
 'Youth Group': 11244,
 'The Devin Townsend Band': 2965,
 'In:aviate': 4014,
 'JAMOSA': 11841,
 'WWE & Jim Johnston (feat. Cage 9)': 7608,
 'Fat Mattress': 11429,
 'Kevin McHale': 5292,
 'Discharger': 9967,
 'Misstress Barbara': 17576,
 'The Lonely Island': 3712,
 'Flogging Molly': 7260,
 'Mads Langer': 15724,
 'FGFC820': 6874,
 'Extrabreit': 18607,
 'T.O.K.': 12407,
 'Arcangel y De La Ghetto': 14342,
 'JLS': 2939,
 'Menog': 4593,
 'Latte e Miele': 14964,
 'Ricardo Villalobos': 5871,
 '1200 Micrograms': 731,
 '\xe9\x9b\x85-MIYAVI-': 397,
 'Krynitza': 12286,
 'Guy Clark': 4537,
 'Cut Chemist': 6583,
 'Amy Jo Johnson': 10022,
 'The Fall': 3299,
 'Jeff Buckley': 215,
 'Phillip Boa': 14983,
 'Saez': 17916,
 'Rockamovya': 3170,
 'Inside the Whale': 6359,
 'Logh': 7890,
 'Cecile Chaminade': 13911,
 '\xe8\x8a\xb8\xe8\x83\xbd\xe5\xb1\xb1\xe5\x9f\x8e\xe7\xb5\x84': 9555,
 'The Bill': 13023,
 'Olly Murs': 15144,
 'Aaryn Doyle': 2181,
 'Lazyfish + Alexandroid': 18319,
 'Billy Preston': 14620,
 'Verba': 9889,
 'Autechre': 744,
 'Guilherma e santiago': 12177,
 'Kisschasy': 16352,
 'Jose Vanders': 13997,
 'Cold in May': 15534,
 'Keith Jarrett': 12119,
 'Satou Rina & Inoue Marina & Chihara Minori': 10246,
 'Azam Ali': 2952,
 'Graforr\xc3\xa9ia Xilarm\xc3\xb4nica': 4801,
 'Party Ben': 8015,
 'Fabio Frizzi': 10219,
 'Ten Sharp': 6924,
 'Footnotes': 8460,
 'O Teatro M\xc3\xa1gico': 438,
 'phantom icicles': 16936,
 'Mindless Faith': 9571,
 'Menstrua\xc3\xa7\xc3\xa3o Anarquika': 4804,
 'Ego Likeness': 18446,
 'Nina Simone': 1833,
 'Stephanie': 4431,
 'Bad Religion': 383,
 'Jai Paul': 9733,
 'Skrillex': 15564,
 'Passafire': 16841,
 'U.D.R.': 7389,
 'Claudia Dorei': 7817,
 'Chit\xc3\xa3ozinho & Xoror\xc3\xb3 e Fresno': 12100,
 'Michael Brook': 15597,
 '\xe3\x83\xa0\xe3\x83\x83\xe3\x82\xaf': 1933,
 'Boris Vian': 7208,
 '\xd0\xa1\xd0\xb0\xd0\xbc\xd0\xbe\xd0\xb5 \xd0\x91\xd0\xbe\xd0\xbb\xd1\x8c\xd1\x88\xd0\xbe\xd0\xb5 \xd0\x9f\xd1\x80\xd0\xbe\xd1\x81\xd1\x82\xd0\xbe\xd0\xb5 \xd0\xa7\xd0\xb8\xd1\x81\xd0\xbb\xd0\xbe': 2156,
 '2nd II None': 2258,
 'Daniel Masson': 18043,
 'Neil Sedaka': 10630,
 'In Mourning': 4612,
 'Kalson': 17170,
 'Giuseppe Torelli': 16331,
 'The-Dream f. Kanye West': 13639,
 'Tomate': 3387,
 'Three Dog Night': 9418,
 'The Mooney Suzuki': 10972,
 'Phantom Planet': 3461,
 'Charlie Straight': 17839,
 'Dengue Fever': 10210,
 'Donovan': 2857,
 'bassment': 18249,
 'Rosenstolz': 3983,
 'Gloriana': 1462,
 'Sondre Lerche': 6297,
 '\xd0\x97\xd0\xb0\xd1\x80\xd0\xb0\xd0\xb6\xd0\xb5\xd0\xbd\xd0\xb8\xd0\xb5 \xd0\x9a\xd1\x80\xd0\xbe\xd0\xb2\xd0\xb8': 18636,
 'Rashamba': 6843,
 'BrokeNCYDE': 1678,
 'Jungle Fever': 9806,
 'Stimulator': 6783,
 'Electrelane': 10110,
 'Brakes': 15262,
 '4TOPS': 14328,
 'Muro': 5825,
 'Jacob Golden': 6208,
 'Culture': 16098,
 'grup gen\xc3\xa7': 17106,
 'Holdcut': 8366,
 'Skyhooks': 16974,
 'Carmen McRae': 15774,
 'Joel Goldsmith': 6398,
 'Wasted Land': 414,
 'Chiasm': 12394,
 'Nina Hagen': 12314,
 'Joe Rogan': 16137,
 'April March': 9414,
 'Skye': 3121,
 'The Diane Rehm Show': 16511,
 'T\xc3\xa9l\xc3\xa9phone': 7214,
 'Mutilator': 9224,
 '\xd0\xa2\xd0\x95\xd0\xa5\xd0\x9d\xd0\x98\xd0\x9a\xd0\x90 : \xd0\x9d\xd0\x95\xd0\x98\xd0\xa1\xd0\x9f\xd0\xa0\xd0\x90\xd0\x92\xd0\x9d\xd0\x90': 10032,
 'Dio': 2342,
 'The Blues Brothers': 6363,
 'Psyclon Nine': 17,
 'Envy on the Coast': 6766,
 'Dorival Caymmi': 6642,
 'Chris Willis; David Guetta; Fergie; LMFAO': 14552,
 'Eths': 3236,
 '\xc3\x84lymyst\xc3\xb6': 10173,
 "Lil' Kim": 8178,
 'Hair': 7861,
 'Ysa Ferrer': 13362,
 '6.Cadde': 12421,
 'Comets on Fire': 14212,
 'Brian McFadden': 11024,
 'The Techniques': 9609,
 'Zeigeist': 7885,
 'Necrophobic': 1284,
 'In[Perfektion]': 11199,
 'Oomph! - Defekt (1995)': 17233,
 'Aghora': 8690,
 'G-Dragon': 409,
 'Injected': 6131,
 'Thundertale': 8325,
 'Neutral Milk Hotel': 1947,
 'Merchant Ships': 16791,
 'Erik Gr\xc3\xb6nwall': 15416,
 'Allan Holdsworth': 13848,
 'Uncle Dave Macon': 2996,
 'Disiplin': 8520,
 'The Post-Modern Cliche': 18114,
 'Versant': 14269,
 'Nathan Barr': 10212,
 'Ella Fitzgerald': 2638,
 'Pitch Black Forecast': 8909,
 'The Courteeners': 1514,
 '1974 AD': 18000,
 'Jack Teagarden': 3009,
 'Hello Seahorse!': 15896,
 'The Rocket Summer': 6600,
 'Superjupiter': 14639,
 'Isis Gee': 4571,
 'Halou': 5197,
 'Alai Oli': 14690,
 'The Briefs': 16877,
 '...And The Earth Swarmed With Them': 17386,
 'A Very Potter Musical': 12209,
 'The Recyclers': 4640,
 'Jazzanova': 1818,
 'Los Desterrados': 9072,
 'Sizzla': 9837,
 'Lulu': 13608,
 'Grand Champeen': 17566,
 'EBT': 12463,
 'Asriel': 2109,
 'Kylie Minogue': 55,
 'Lull': 11099,
 'Votchi': 13111,
 'Tsuyoshi Sekito': 16631,
 'OK Go': 2121,
 '\xe6\xa4\x8e\xe5\x90\x8d\xe6\x9e\x97\xe6\xaa\x8e\xc3\x97\xe6\x96\x8e\xe8\x97\xa4\xe3\x83\x8d\xe3\x82\xb3': 17487,
 '\xec\x86\xa1\xec\xa7\x80\xec\x9d\x80': 16165,
 'Lulu Rouge': 13033,
 'Milk Inc.': 2934,
 'Dark Voices': 10636,
 'Ida Maria': 4802,
 'Safura': 670,
 'm\xc3\xbam': 444,
 'Haddaway': 3978,
 'Score Deseos Ocultos': 8413,
 'Eirik Hansen og Ove Randow': 6936,
 'Nouvelle Vague': 2616,
 'Daggermouth': 9800,
 'The Durutti Column': 10111,
 'Refused': 5205,
 'Maverick': 17671,
 'Telefon Tel Aviv': 2691,
 'Cubismo Grafico': 17624,
 'Millie Jackson': 15775,
 'Vitalic': 4728,
 '\xe6\xa1\x9c\xe5\xba\xad\xe7\xb5\xb1': 8736,
 'Sheikh Mishary Rashed Alafasy': 12441,
 'Mathias Grassow': 11109,
 'The Heptones': 16101,
 'Tiger Lou': 12295,
 'Head of David': 3513,
 'Sick Terror': 10595,
 'Dis': 13773,
 'Art of Dying': 6251,
 'Southside Johnny & The Asbury Jukes': 17208,
 'The Adverts': 10761,
 'Ahmet Aslan': 18687,
 'Reza Yazdani': 15706,
 'This Is Seb Clarke': 7735,
 'Sad Lovers and Giants': 5912,
 'Art of Noise': 1836,
 'Schizma': 11851,
 'The Monochrome Set': 10766,
 'Flexe': 13656,
 'Cervello Elettronico': 12845,
 'Sensei & Edinaka': 13768,
 'Texas in July': 5141,
 'Cary Brothers': 5553,
 'KISS': 2343,
 'Sacred Steel': 2739,
 'Vama Veche': 9532,
 'Emilia': 14520,
 'Grass': 6281,
 'Send More Paramedics': 13567,
 'KT Tunstall': 2544,
 'Nitzer Ebb': 880,
 "Cap'n Jazz": 3335,
 'Svarte Greiner': 147,
 'Kosheen': 90,
 'Cerebral Incubation': 5223,
 'Chris Corner': 5893,
 'Babyshambles': 208,
 'Cyberaktif': 12509,
 'Polly Scattergood': 15288,
 'Boy Better Know': 17041,
 'Brodka': 10987,
 'The Persuasions': 8116,
 'Sentinel': 6277,
 'My Dad Is Dead': 6040,
 'Lordi': 2664,
 'Say Anything': 6773,
 'Vaivencida': 14545,
 'Maria del Mar Bonet': 13585,
 'Soul Asylum': 10851,
 'La Chat': 14137,
 'Stuart A. Staples': 14652,
 'Nina Persson': 14060,
 'Be Your Own Pet': 11819,
 'Os Pedrero': 4157,
 'Tear Down The Lies!': 10107,
 'Alpha Quadrant': 1250,
 'Kultur Shock': 6491,
 "Grand Belial's Key": 7915,
 'Bilge Pump': 14356,
 'Reflection Eternal': 10398,
 'Brainstorm': 4609,
 'Monocordio': 13670,
 'Susana': 10844,
 'Miguel Aceves Mej\xc3\xada': 8804,
 'Jessica Andrews': 2979,
 'Tarmvred': 15626,
 'Race': 13694,
 'Mila J': 8092,
 'Integrity Music': 14991,
 'Resgate': 9185,
 'Halestorm': 11581,
 'Starflyer 59': 3773,
 'The Orb': 5503,
 'Kayah': 10988,
 'Bedouin Ascent': 17366,
 'Capoeira Angola': 14689,
 'Babado Novo': 3388,
 'The Bigger Lights': 6603,
 'Bebel Gilberto': 10918,
 'Mini k Bros': 4628,
 'Ananda Project': 17530,
 'Fenomen': 10045,
 'Die Sektor': 6872,
 'Hana Pestle': 11578,
 'Orelsan': 2246,
 'Marilyn Manson & The Spooky Kids': 7680,
 'Jean Grae': 10393,
 'Fantazja': 5705,
 'Korouva': 12317,
 '\xc5\x9ean\xc4\xb1\xc5\x9fer feat. Atakan': 12424,
 'Rie fu': 1922,
 'Dougal & Gammer': 6116,
 'Jenny Lewis with The Watson Twins': 6302,
 'Mason Jennings': 10198,
 'Wilson Paim': 12149,
 'Rey Gordiflon': 17290,
 'The Casualties': 6997,
 'Aereogramme': 10820,
 'Oomph!': 1714,
 'Uh Huh Her': 11155,
 'Wolf Eyes & Black Dice': 4293,
 'Microfilm': 16393,
 'Monster Magnet': 2362,
 '\xd0\x9b\xd1\x8f\xd0\xbf\xd0\xb8\xd1\x81 \xd0\xa2\xd1\x80\xd1\x83\xd0\xb1\xd0\xb5\xd1\x86\xd0\xba\xd0\xbe\xd0\xb9': 2735,
 'Alexander Kowalski': 14765,
 'Amber': 7730,
 'Chick Webb': 2995,
 'Earl Sweatshirt': 8472,
 '\xd7\x90\xd7\x91\xd7\x99\xd7\x91 \xd7\x92\xd7\xa4\xd7\x9f': 160,
 'Sam Taylor-Wood': 15240,
 'Coiffeur': 8410,
 'Klaus Nomi': 6241,
 'Steph Jones': 8094,
 'Love Shop': 8505,
 'Ximena': 12593,
 'Valentine': 5468,
 'Mademoiselle K': 7351,
 'iLiKETRAiNS': 12583,
 '\xe3\x83\xab\xe3\x83\xab\xe3\x83\x86\xe3\x82\xa3\xe3\x82\xa2': 2104,
 'Boby Lapointe': 7228,
 'Squarepusher (aka Chaos A.D.)': 7491,
 'Kany Garc\xc3\xada': 15330,
 'Loveholic': 7174,
 'Slightly Stoopid': 11255,
 'Gandhi': 10534,
 'Sky Limousine': 5865,
 'Evan Taubenfeld': 3931,
 ...}

In [100]:
# Get artists listened by a user and the listeningCount

i = 1860
uID = idx2userID[i]

print 'User:', uID
a_idx = X_lil[i,:].nonzero()[1]

for j in a_idx:
    print artistID2artistName[idx2artistID[j]], X_lil[i, j]


User: 2064
Gorillaz 23.0
Muse 28.0
Death Cab for Cutie 68.0
Band of Horses 17.0
Interpol 18.0
Kings of Leon 477.0
The Killers 41.0
Green Day 74.0
P!nk 116.0
Avril Lavigne 24.0
Linkin Park 203.0
Metro Station 17.0
30 Seconds to Mars 25.0
Three Days Grace 16.0
Papa Roach 120.0
Paramore 1130.0
Flyleaf 1458.0
Bullet for My Valentine 139.0
Fall Out Boy 406.0
Ellie Goulding 24.0
Glee Cast 141.0
Florence + the Machine 101.0
Marina & the Diamonds 26.0
Sia 19.0
The Pretty Reckless 243.0
Bring Me The Horizon 145.0
Atreyu 40.0
Boys Like Girls 25.0
The Kooks 19.0
Pink 73.0
Editors 21.0
Asking Alexandria 265.0
Black Rebel Motorcycle Club 26.0
Metric 17.0
Breaking Benjamin 177.0
In This Moment 127.0
We Butter The Bread With Butter 214.0
Automatic Loveletter 18.0
Eyes Set to Kill 312.0
Silversun Pickups 30.0
The Bravery 23.0
Sylvester 17.0
Sea Wolf 19.0
Callejón 47.0
Robert Francis 291.0
Bakkushan 49.0
Bon Iver & St. Vincent 49.0
Boy 29.0
Hurricane Bells 19.0
John James 17.0

In [7]:
# Users listening to an artist 
artistName = 'Morrissey'
j = artistID2idx[artistName2artistID[artistName]]

idx = X_lil[:,j].nonzero()[0]
for i in idx:
    print idx2userID[i]


94
100
141
142
183
215
237
290
369
428
436
462
540
560
596
622
665
694
728
764
917
925
1075
1087
1202
1210
1232
1240
1244
1255
1276
1303
1316
1337
1354
1375
1389
1424
1427
1463
1486
1496
1498
1506
1518
1541
1594
1653
1685
1732
1746
1815
1852
1861
1905
1922
1929
1958
1992
2014

In [74]:
sm = np.array(np.sum(X_lil, axis=0))[0]
idx_sorted = np.array(sm.argsort())

for i in reversed(idx_sorted[-300:]):
    print artistID2artistName[idx2artistID[i]],sm[i]


Britney Spears 2393140.0
Depeche Mode 1301308.0
Lady Gaga 1291387.0
Christina Aguilera 1058405.0
Paramore 963449.0
Madonna 921198.0
Rihanna 905423.0
Shakira 688529.0
The Beatles 662116.0
Katy Perry 532545.0
Avril Lavigne 525844.0
Taylor Swift 525292.0
Evanescence 513476.0
Glee Cast 506453.0
Beyoncé 499318.0
U2 493024.0
Miley Cyrus 489065.0
30 Seconds to Mars 485532.0
Muse 485076.0
Pink Floyd 466104.0
Kylie Minogue 449292.0
Radiohead 385306.0
Ke$ha 384405.0
Mariah Carey 384307.0
Metallica 371916.0
Iron Maiden 368710.0
Thalía 350035.0
Duran Duran 348919.0
Coldplay 330757.0
Eminem 321011.0
Blur 318221.0
Michael Jackson 296882.0
Arctic Monkeys 288520.0
System of a Down 277397.0
Linkin Park 265362.0
Kelly Clarkson 253027.0
Pearl Jam 251440.0
Placebo 245878.0
In Flames 237148.0
All Time Low 215777.0
Amy Winehouse 213103.0
P!nk 207761.0
a-ha 205195.0
Led Zeppelin 203665.0
Björk 202178.0
Avenged Sevenfold 200949.0
Justin Bieber 200027.0
The Killers 191979.0
Nirvana 190232.0
Black Eyed Peas 188634.0
Daft Punk 176043.0
Ashley Tisdale 166644.0
Oasis 165975.0
Leona Lewis 162288.0
Nine Inch Nails 160317.0
My Chemical Romance 159733.0
Green Day 155321.0
Backstreet Boys 153101.0
The Cure 151103.0
Kings of Leon 148452.0
Jennifer Lopez 144710.0
Demi Lovato 139378.0
The Pretty Reckless 138049.0
Sarah Brightman 137581.0
Sigur Rós 133955.0
Guns N' Roses 133931.0
David Bowie 133511.0
Black Sabbath 131397.0
The Pussycat Dolls 130855.0
The Strokes 130227.0
Rammstein 129745.0
Lily Allen 129352.0
Carrie Underwood 126314.0
Hilary Duff 123682.0
Janet Jackson 120665.0
Panic! At the Disco 120336.0
Alanis Morissette 117229.0
Red Hot Chili Peppers 116768.0
Adam Lambert 115538.0
The Veronicas 114690.0
Queen 114505.0
Bright Eyes 113631.0
Florence + the Machine 113467.0
Nelly Furtado 110218.0
Selena Gomez & the Scene 110123.0
Megadeth 102662.0
blink-182 102334.0
Los Hermanos 100819.0
Death Cab for Cutie 100298.0
倖田來未 99845.0
AC/DC 99820.0
Joy Division 99296.0
Enrique Iglesias 97089.0
50 Cent 96353.0
Marilyn Manson 96201.0
The Doors 95880.0
Cyndi Lauper 94551.0
Kanye West 94501.0
The Kooks 94398.0
3OH!3 94179.0
Ashlee Simpson 93828.0
Pet Shop Boys 91869.0
The Smiths 91740.0
Justin Timberlake 91169.0
Alicia Keys 91066.0
A Day to Remember 90365.0
The Devil Wears Prada 90154.0
Porcupine Tree 88594.0
The Smashing Pumpkins 88270.0
Foo Fighters 88086.0
Wanessa 87617.0
The Rolling Stones 87432.0
David Archuleta 86342.0
MGMT 86195.0
Lindsay Lohan 85160.0
McFly 83954.0
Céline Dion 83373.0
Erasure 83206.0
Nightwish 83061.0
Franz Ferdinand 82999.0
Interpol 81822.0
Girls Aloud 80222.0
Rise Against 80184.0
Opeth 79847.0
Fall Out Boy 79486.0
Bring Me The Horizon 78578.0
Slipknot 78363.0
Within Temptation 78342.0
Enigma 77046.0
Elvis Presley 76175.0
Tool 76012.0
Keane 75804.0
Flyleaf 75634.0
Leighton Meester 75372.0
Adele 75169.0
Brandy 74834.0
Boards of Canada 74039.0
Ramones 73961.0
Nicole Scherzinger 73597.0
Jordin Sparks 72962.0
Matanza 71710.0
HIM 71488.0
Escape The Fate 71434.0
Lifehouse 71250.0
Good Charlotte 70828.0
Fresno 70589.0
John Mayer 70360.0
Children of Bodom 70314.0
Queens of the Stone Age 69833.0
Gorillaz 69629.0
The Birthday Massacre 68917.0
Three Days Grace 68306.0
Ciara 67979.0
Cheryl Cole 67822.0
New Order 67259.0
OneRepublic 67187.0
Mylène Farmer 66688.0
Gwen Stefani 65902.0
Dream Theater 65886.0
Regina Spektor 65641.0
Tokio Hotel 65344.0
Bob Dylan 65145.0
Incubus 64897.0
Jesse McCartney 64596.0
SHINee 64256.0
t.A.T.u. 63661.0
David Guetta 63582.0
Robyn 63510.0
Deftones 62933.0
Pantera 62838.0
Arcade Fire 62735.0
Slayer 62107.0
Jonas Brothers 61849.0
Kerli 61748.0
The Used 61679.0
As I Lay Dying 61163.0
浜崎あゆみ 60167.0
Crystal Castles 59938.0
Elliott Smith 59821.0
Sugababes 59353.0
The Offspring 58965.0
The Who 58438.0
Pixie Lott 58312.0
Korn 58163.0
Snow Patrol 57847.0
Akon 57537.0
Maroon 5 57009.0
Timbaland 56599.0
Tegan and Sara 56006.0
Dave Gahan 55944.0
Sum 41 55683.0
Enter Shikari 55642.0
Cobra Starship 55463.0
R.E.M. 55137.0
The Saturdays 54909.0
Robbie Williams 54825.0
BoA 54663.0
Goldfrapp 54597.0
Mika 53918.0
A Perfect Circle 53915.0
Nicki Minaj 53353.0
Yeah Yeah Yeahs 53054.0
Breaking Benjamin 52706.0
IAMX 52137.0
Underoath 51988.0
Aly & AJ 51641.0
Anahí 51602.0
Bullet for My Valentine 51566.0
Lil' Wayne 50645.0
Pitty 50241.0
ABBA 50132.0
Belle and Sebastian 49881.0
The National 49647.0
Tori Amos 49603.0
Scissor Sisters 49521.0
Natalie Imbruglia 49290.0
Alice in Chains 49014.0
Ellie Goulding 48631.0
Morrissey 48397.0
Röyksopp 47993.0
Kasabian 47200.0
Garbage 46606.0
Atreyu 46466.0
Jeffree Star 46370.0
Lostprophets 46295.0
Damien Rice 46061.0
Kate Nash 45886.0
Tangerine Dream 45769.0
Judas Priest 45672.0
Sonata Arctica 45666.0
The White Stripes 45355.0
Disturbed 44959.0
Aerosmith 44878.0
Moby 44852.0
Usher 44673.0
Simple Plan 44240.0
Air 44230.0
KISS 44187.0
RBD 44182.0
Asking Alexandria 44151.0
Yann Tiersen 43972.0
Portishead 43961.0
The xx 43860.0
nevershoutnever! 43710.0
Apocalyptica 43644.0
Papa Roach 43559.0
Dave Matthews Band 43536.0
Katatonia 43458.0
Cocteau Twins 43440.0
Hadouken! 43303.0
Bon Jovi 43252.0
M.I.A. 42418.0
Miranda Lambert 42363.0
Bat for Lashes 42305.0
Massive Attack 42195.0
Cher 41552.0
Daughtry 41447.0
Tom Waits 41316.0
Nickelback 41094.0
Tarja 41028.0
Vanessa Hudgens 40837.0
Deep Purple 40664.0
Colbie Caillat 40510.0
The Prodigy 40473.0
Killswitch Engage 40428.0
Jason Mraz 40288.0
Animal Collective 40234.0
Modest Mouse 40203.0
Duffy 40042.0
Jack Johnson 39882.0
Martin L. Gore 39658.0
Sufjan Stevens 39491.0
Dido 39172.0
Natasha Bedingfield 39111.0
And One 38949.0
Cansei de Ser Sexy 38781.0
No Doubt 38608.0
Bushido 38392.0
Jessica Simpson 38190.0
Beirut 37812.0
The Fray 37800.0
Brand New 37663.0
Electric Light Orchestra 37655.0
Beck 37576.0
The Libertines 37532.0
Jay-Z 37521.0
Bloc Party 37500.0
Faith No More 37375.0
Silverchair 37287.0
Johnny Cash 37211.0
Boys Like Girls 37136.0
Owl City 36911.0
Manic Street Preachers 36669.0
The All-American Rejects 36370.0
Engenheiros do Hawaii 35978.0
Cat Power 35947.0
Ozzy Osbourne 35924.0
Anathema 35700.0
Cascada 35347.0
Viking Quest 35323.0

Tags


In [8]:
df = pd.read_csv('./data/hetrec2011-lastfm-2k/tags.dat', sep='\t')
tagName2tagID = {rec[2]: rec[1] for rec in df.itertuples()}
tagID2tagName = {rec[1]: rec[2] for rec in df.itertuples()}

tagID2idx = { tid: num for num,tid in enumerate(sorted(tagID2tagName.keys()))}
idx2tagID = { num: tid for num,tid in enumerate(sorted(tagID2tagName.keys()))}

NumOfTags = len(tagID2tagName)
NumOfTags


Out[8]:
11946

Tag Assignments by user to artist


In [9]:
df = pd.read_csv('./data/hetrec2011-lastfm-2k/user_taggedartists-timestamps.dat', sep='\t')
df


Out[9]:
userID artistID tagID timestamp
0 2 52 13 1238536800000
1 2 52 15 1238536800000
2 2 52 18 1238536800000
3 2 52 21 1238536800000
4 2 52 41 1238536800000
5 2 63 13 1238536800000
6 2 63 14 1238536800000
7 2 63 23 1238536800000
8 2 63 40 1238536800000
9 2 73 13 1238536800000
10 2 73 14 1238536800000
11 2 73 15 1238536800000
12 2 73 18 1238536800000
13 2 73 20 1238536800000
14 2 73 21 1238536800000
15 2 73 22 1238536800000
16 2 73 26 1238536800000
17 2 94 13 1238536800000
18 2 94 15 1238536800000
19 2 94 20 1238536800000
20 2 94 21 1238536800000
21 2 94 22 1238536800000
22 2 94 36 1238536800000
23 2 94 37 1238536800000
24 2 94 39 1238536800000
25 2 96 19 1238536800000
26 2 96 24 1238536800000
27 2 995 16 1241128800000
28 2 995 17 1241128800000
29 2 995 24 1241128800000
... ... ... ... ...
186449 2099 18715 758 1259622000000
186450 2099 18716 13 1254348000000
186451 2099 18716 15 1254348000000
186452 2099 18716 758 1254348000000
186453 2099 18717 13 1264978800000
186454 2099 18717 15 1264978800000
186455 2099 18717 758 1264978800000
186456 2099 18719 758 1264978800000
186457 2099 18721 15 1230764400000
186458 2099 18721 758 1230764400000
186459 2099 18723 626 1288566000000
186460 2099 18723 2485 1288566000000
186461 2099 18724 13 1264978800000
186462 2099 18724 15 1264978800000
186463 2099 18724 2525 1264978800000
186464 2100 1111 574 1235862000000
186465 2100 3855 3271 1277935200000
186466 2100 6658 4 1254348000000
186467 2100 6658 2087 1254348000000
186468 2100 6658 2088 1254348000000
186469 2100 8322 4 1241128800000
186470 2100 8322 3510 1241128800000
186471 2100 8322 4364 1241128800000
186472 2100 8322 4365 1259622000000
186473 2100 13978 574 1243807200000
186474 2100 16437 4 1277935200000
186475 2100 16437 292 1272664800000
186476 2100 16437 2087 1277935200000
186477 2100 16437 2801 1272664800000
186478 2100 16437 3335 1277935200000

186479 rows × 4 columns


In [10]:
new_artists = set()

for r in df.itertuples():
    if not artistID2idx.has_key(r[2]):
        new_artists.add(r[2])


nextArtistID = NumOfArtists

for u in new_artists:
    if not artistID2artistName.has_key(u):
        name = "Unknown_"+str(u)
        print name,u
        artistID2artistName[u] = name
        artistName2artistID[name] = u
        artistID2idx[u] = nextArtistID
        idx2artistID[nextArtistID] = u
        nextArtistID += 1


Unknown_14338 14338
Unknown_14 14
Unknown_12303 12303
Unknown_16400 16400
Unknown_16401 16401
Unknown_12307 12307
Unknown_29 29
Unknown_33 33
Unknown_35 35
Unknown_14377 14377
Unknown_10283 10283
Unknown_10285 10285
Unknown_14382 14382
Unknown_10287 10287
Unknown_16433 16433
Unknown_10290 10290
Unknown_10293 10293
Unknown_18487 18487
Unknown_10298 10298
Unknown_18492 18492
Unknown_18493 18493
Unknown_18496 18496
Unknown_14402 14402
Unknown_18502 18502
Unknown_18503 18503
Unknown_14411 14411
Unknown_18509 18509
Unknown_12302 12302
Unknown_14422 14422
Unknown_14423 14423
Unknown_14426 14426
Unknown_14427 14427
Unknown_14428 14428
Unknown_14429 14429
Unknown_16478 16478
Unknown_16481 16481
Unknown_14434 14434
Unknown_14440 14440
Unknown_6256 6256
Unknown_18565 18565
Unknown_18566 18566
Unknown_18567 18567
Unknown_15553 15553
Unknown_16536 16536
Unknown_16539 16539
Unknown_16540 16540
Unknown_16545 16545
Unknown_16546 16546
Unknown_16547 16547
Unknown_16549 16549
Unknown_18598 18598
Unknown_16551 16551
Unknown_16554 16554
Unknown_16555 16555
Unknown_16556 16556
Unknown_16558 16558
Unknown_17780 17780
Unknown_18623 18623
Unknown_18624 18624
Unknown_18625 18625
Unknown_18627 18627
Unknown_18628 18628
Unknown_18629 18629
Unknown_8391 8391
Unknown_14549 14549
Unknown_8421 8421
Unknown_12541 12541
Unknown_14379 14379
Unknown_18699 18699
Unknown_6413 6413
Unknown_18704 18704
Unknown_14613 14613
Unknown_6423 6423
Unknown_6425 6425
Unknown_18481 18481
Unknown_18732 18732
Unknown_11997 11997
Unknown_14654 14654
Unknown_14660 14660
Unknown_12616 12616
Unknown_12619 12619
Unknown_18490 18490
Unknown_9832 9832
Unknown_14711 14711
Unknown_4472 4472
Unknown_14720 14720
Unknown_14721 14721
Unknown_12354 12354
Unknown_14406 14406
Unknown_10665 10665
Unknown_16814 16814
Unknown_16815 16815
Unknown_16816 16816
Unknown_16818 16818
Unknown_16827 16827
Unknown_16831 16831
Unknown_14809 14809
Unknown_14811 14811
Unknown_14814 14814
Unknown_14815 14815
Unknown_14816 14816
Unknown_14818 14818
Unknown_14822 14822
Unknown_10728 10728
Unknown_8688 8688
Unknown_14848 14848
Unknown_14850 14850
Unknown_14851 14851
Unknown_14852 14852
Unknown_14859 14859
Unknown_14860 14860
Unknown_8717 8717
Unknown_8720 8720
Unknown_14867 14867
Unknown_14868 14868
Unknown_14869 14869
Unknown_14870 14870
Unknown_16474 16474
Unknown_10791 10791
Unknown_10792 10792
Unknown_16945 16945
Unknown_16946 16946
Unknown_4668 4668
Unknown_4673 4673
Unknown_4674 4674
Unknown_10819 10819
Unknown_4676 4676
Unknown_4677 4677
Unknown_16977 16977
Unknown_16987 16987
Unknown_12893 12893
Unknown_12894 12894
Unknown_12895 12895
Unknown_12896 12896
Unknown_12898 12898
Unknown_12903 12903
Unknown_12904 12904
Unknown_11708 11708
Unknown_4714 4714
Unknown_10862 10862
Unknown_10863 10863
Unknown_12912 12912
Unknown_4721 4721
Unknown_3518 3518
Unknown_4729 4729
Unknown_10887 10887
Unknown_12941 12941
Unknown_10896 10896
Unknown_11715 11715
Unknown_8879 8879
Unknown_8880 8880
Unknown_17075 17075
Unknown_10945 10945
Unknown_10947 10947
Unknown_10948 10948
Unknown_10951 10951
Unknown_8922 8922
Unknown_8926 8926
Unknown_8935 8935
Unknown_8940 8940
Unknown_8943 8943
Unknown_8946 8946
Unknown_8949 8949
Unknown_8950 8950
Unknown_11027 11027
Unknown_11028 11028
Unknown_15125 15125
Unknown_15132 15132
Unknown_15133 15133
Unknown_17894 17894
Unknown_11054 11054
Unknown_11058 11058
Unknown_11059 11059
Unknown_11060 11060
Unknown_15177 15177
Unknown_15178 15178
Unknown_15189 15189
Unknown_15190 15190
Unknown_6999 6999
Unknown_7000 7000
Unknown_7001 7001
Unknown_13146 13146
Unknown_7004 7004
Unknown_7005 7005
Unknown_7006 7006
Unknown_10726 10726
Unknown_17308 17308
Unknown_17310 17310
Unknown_17316 17316
Unknown_15271 15271
Unknown_15272 15272
Unknown_6300 6300
Unknown_17325 17325
Unknown_15279 15279
Unknown_15283 15283
Unknown_15284 15284
Unknown_5046 5046
Unknown_5054 5054
Unknown_5056 5056
Unknown_5059 5059
Unknown_11214 11214
Unknown_11216 11216
Unknown_15326 15326
Unknown_18597 18597
Unknown_13288 13288
Unknown_13290 13290
Unknown_13480 13480
Unknown_5108 5108
Unknown_5109 5109
Unknown_11258 11258
Unknown_11259 11259
Unknown_15359 15359
Unknown_15360 15360
Unknown_15363 15363
Unknown_6316 6316
Unknown_17479 17479
Unknown_17480 17480
Unknown_13836 13836
Unknown_11353 11353
Unknown_11354 11354
Unknown_13407 13407
Unknown_11361 11361
Unknown_11371 11371
Unknown_5230 5230
Unknown_15550 15550
Unknown_15552 15552
Unknown_15493 15493
Unknown_15494 15494
Unknown_17601 17601
Unknown_3210 3210
Unknown_3218 3218
Unknown_3219 3219
Unknown_13461 13461
Unknown_17561 17561
Unknown_13467 13467
Unknown_13469 13469
Unknown_13472 13472
Unknown_13474 13474
Unknown_15528 15528
Unknown_13483 13483
Unknown_13485 13485
Unknown_15539 15539
Unknown_15541 15541
Unknown_15546 15546
Unknown_15547 15547
Unknown_17598 17598
Unknown_7359 7359
Unknown_7360 7360
Unknown_7361 7361
Unknown_17604 17604
Unknown_15557 15557
Unknown_15559 15559
Unknown_15560 15560
Unknown_15561 15561
Unknown_6315 6315
Unknown_11486 11486
Unknown_11488 11488
Unknown_11493 11493
Unknown_17643 17643
Unknown_13570 13570
Unknown_13576 13576
Unknown_13578 13578
Unknown_17695 17695
Unknown_17696 17696
Unknown_9778 9778
Unknown_3522 3522
Unknown_11585 11585
Unknown_11600 11600
Unknown_7508 7508
Unknown_13658 13658
Unknown_13661 13661
Unknown_13662 13662
Unknown_13663 13663
Unknown_15730 15730
Unknown_15732 15732
Unknown_15733 15733
Unknown_15736 15736
Unknown_17810 17810
Unknown_5527 5527
Unknown_17819 17819
Unknown_17820 17820
Unknown_17821 17821
Unknown_17822 17822
Unknown_17823 17823
Unknown_17824 17824
Unknown_17825 17825
Unknown_17828 17828
Unknown_17829 17829
Unknown_17830 17830
Unknown_17832 17832
Unknown_13896 13896
Unknown_11704 11704
Unknown_3515 3515
Unknown_3516 3516
Unknown_11710 11710
Unknown_3519 3519
Unknown_11714 11714
Unknown_7755 7755
Unknown_3525 3525
Unknown_3533 3533
Unknown_13785 13785
Unknown_13787 13787
Unknown_15841 15841
Unknown_15842 15842
Unknown_15843 15843
Unknown_15844 15844
Unknown_15846 15846
Unknown_17895 17895
Unknown_9714 9714
Unknown_17919 17919
Unknown_17920 17920
Unknown_17924 17924
Unknown_13833 13833
Unknown_13835 13835
Unknown_17932 17932
Unknown_17933 17933
Unknown_5037 5037
Unknown_3626 3626
Unknown_3629 3629
Unknown_3634 3634
Unknown_16990 16990
Unknown_3639 3639
Unknown_3641 3641
Unknown_16992 16992
Unknown_13894 13894
Unknown_13895 13895
Unknown_3656 3656
Unknown_13899 13899
Unknown_7768 7768
Unknown_11867 11867
Unknown_18016 18016
Unknown_18021 18021
Unknown_7784 7784
Unknown_9835 9835
Unknown_7788 7788
Unknown_9841 9841
Unknown_9842 9842
Unknown_8131 8131
Unknown_13972 13972
Unknown_14617 14617
Unknown_13976 13976
Unknown_10180 10180
Unknown_16032 16032
Unknown_10183 10183
Unknown_11973 11973
Unknown_11974 11974
Unknown_9945 9945
Unknown_9949 9949
Unknown_18142 18142
Unknown_18144 18144
Unknown_16109 16109
Unknown_16110 16110
Unknown_12041 12041
Unknown_12043 12043
Unknown_14098 14098
Unknown_14103 14103
Unknown_14104 14104
Unknown_18222 18222
Unknown_7358 7358
Unknown_8010 8010
Unknown_16212 16212
Unknown_16216 16216
Unknown_16221 16221
Unknown_16222 16222
Unknown_10079 10079
Unknown_16224 16224
Unknown_10082 10082
Unknown_16228 16228
Unknown_1903 1903
Unknown_12148 12148
Unknown_1911 1911
Unknown_1916 1916
Unknown_1926 1926
Unknown_4083 4083
Unknown_10175 10175
Unknown_14272 14272
Unknown_14273 14273
Unknown_10178 10178
Unknown_10179 10179
Unknown_8132 8132
Unknown_8133 8133
Unknown_8135 8135
Unknown_8138 8138
Unknown_8139 8139
Unknown_18419 18419
Unknown_4085 4085
Unknown_18422 18422
Unknown_4088 4088
Unknown_4090 4090
Unknown_4093 4093

In [83]:
NumOfArtistsExtended = len(artistID2artistName)

TG = {(userID2idx[r[1]],artistID2idx[r[2]], tagID2idx[r[3]]): 1 for r in df.itertuples()}

ArtistTimesTagged = sparse.lil_matrix((NumOfArtistsExtended, NumOfTags))

for k in TG.iterkeys():
    i = k[1]
    j = k[2]
    ArtistTimesTagged[i,j] += 1

In [87]:
plt.figure(figsize=(12,20))
plt.spy(ArtistTimesTagged[0:,0:],markersize=1)
plt.ylabel('Artists')
plt.xlabel('Tags')
plt.show()


Tags of an artist


In [122]:
#i = artistID2idx[artistName2artistID['Brad Mehldau']]
#i = artistID2idx[artistName2artistID['Baba Zula']]
#i = artistID2idx[artistName2artistID['Britney Spears']]
#i = artistID2idx[artistName2artistID['Kurban']]
#i = artistID2idx[artistName2artistID['Nancy Sinatra']]
i = artistID2idx[artistName2artistID['Nancy Sinatra']]
aID = idx2artistID[i]

print 'Artist:', artistID2artistName[aID]

tag_idx = ArtistTimesTagged[i,:].nonzero()[1]

for j in tag_idx:
    print tagID2tagName[idx2tagID[j]], ArtistTimesTagged[i, j]


Artist: Nancy Sinatra
pop 3.0
80s 1.0
female vocalist 1.0
rock 2.0
female vocalists 6.0
soundtrack 2.0
classic rock 4.0
country 1.0
amazing 1.0
60s 7.0
gay 1.0
oldies 2.0
70s 1.0
rain 1.0
the beatles 1.0
cool 1.0
sixties 1.0
politeconomist realife soundtrack 1.0
james bond 1.0
kill bill 1.0
full metal jacket 1.0
1966 1.0
1968 1.0
i love my dad taste 1.0
q sexy 1.0
girlamazing 1.0

In [76]:
print(NumOfArtists)
len(artistID2artistName)


17632
Out[76]:
18022

Social Network


In [12]:
df = pd.read_csv('./data/hetrec2011-lastfm-2k/user_friends.dat', sep='\t')
SN = {(userID2idx[r[1]],userID2idx[r[2]]): 1 for r in df.itertuples()}

In [150]:
import time

timestamp = 1241128800000
time.strftime("%a %d %b %Y %H:%M:%S GMT", time.gmtime(timestamp/1000))


Out[150]:
'Thu 30 Apr 2009 22:00:00 GMT'