Investigating MSD genre mapping datasets

Is there one better than the one I hacked together by connecting musicXconnect ids with spotify genre information?


In [4]:
import pandas as pd

In [2]:
import pickle

with open('train_track_info.pickle','r') as f:
	track_info = pickle.load(f)

print "Number of songs in my genre mapping dataset: " + str(len(track_info))


Number of songs in my genre mapping dataset: 27133

In [46]:
tagtraum = pd.read_csv('msd_tagtraum_cd1.cls',sep='\t',comment='#',names=['id','genre1','genre2'],na_values='',dtype=str)

print "Number of songs in tagtraum dataset: " + str(len(tagtraum))


Number of songs in tagtraum dataset: 133676

In [56]:
num_rap = np.sum(tagtraum.genre1 == 'Rap') + np.sum(tagtraum.genre2 == 'Rap')
print "Number of rap songs: %d Percentage rap: %.2f" % (num_rap, 100*float(num_rap)/len(tagtraum))


Number of rap songs: 7363 Percentage rap: 5.51

In [49]:
magd = pd.read_csv('msd_tagtraum_cd1.cls',sep='\t',comment='#',names=['id','genre'],na_values='',dtype=str)

In [51]:
print "Number of songs in magd dataset: " + str(len(magd))


Number of songs in magd dataset: 133676

In [54]:
num_rap = np.sum(magd.genre == 'Rap')
print "Number of rap songs: %d Percentage rap: %.2f" % (num_rap, 100*float(num_rap)/len(magd))


Number of rap songs: 6151 Percentage rap: 4.60

In [ ]: