In [ ]:
%matplotlib inline
import os, sys, time
import pickle as pkl
import numpy as np
#from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
sys.path.append('src')
import hdf5_getters as h5getters
In [ ]:
data_dir = 'data/msd'
fsong2track = os.path.join(data_dir, 'songID2TrackIDs.pkl')
ffeatures_msd = os.path.join(data_dir, 'songID2Features.pkl')
Load playlists.
In [ ]:
#playlists_aotm = pkl.load(open(faotm, 'rb'))
In [ ]:
#print('#Playlists: %d' % len(playlists_aotm))
In [ ]:
#playlists_aotm[0]
In [ ]:
#song_set = sorted({songID for p in playlists_aotm for songID in p})
In [ ]:
#print('#Songs: %d' % len(song_set))
In [ ]:
#lengths = [len(p) for p in playlists_aotm]
#plt.hist(lengths, bins=20)
#print('Average playlist length: %.1f' % np.mean(lengths))
Song_id --> Song_name
mapping.
In [ ]:
#songID2Name = {s[1]: s[0] for p in playlists_aotm for s in p['playlist']}
Load song_id
--> track_id
mapping: a song may correspond to multiple tracks.
In [ ]:
#song2TrackID = pkl.load(open(fmap, 'rb'))
In [ ]:
#len(song2TrackID)
In [ ]:
#{ k : song2TrackID[k] for k in list(song2TrackID.keys())[:10] }
In [ ]:
song2tracks = pkl.load(open(fsong2track, 'rb'))
In [ ]:
len(song2tracks)
In [ ]:
#song2tracks['SOAABRB12A58A792A3']
Load the audio features given songID
, if a song has more than one trackID
, simply use the first available track.
In [ ]:
def gen_h5dir(data_dir, trackID):
return os.path.join(data_dir, '/'.join([trackID[ix] for ix in [2,3,4]]))
In [ ]:
trackID = 'TRARPDM128F14AE1CC'
msd_h5dir = gen_h5dir(data_dir, trackID)
h5 = h5getters.open_h5_file_read(os.path.join(msd_h5dir, trackID + '.h5'))
#print(h5getters.get_num_songs(h5))
#print(h5getters.get_artist_mbid(h5))
#print(h5getters.get_artist_mbtags(h5)) # SS: song specific
#print(h5getters.get_artist_mbtags_count(h5)) # SS array
#print(h5getters.get_artist_name(h5))
#print(h5getters.get_artist_playmeid(h5))
#print(h5getters.get_artist_terms(h5).shape) # SS, Y, text
#print(h5getters.get_artist_terms_freq(h5).shape) # SS, Y, numerical
#print(h5getters.get_artist_terms_weight(h5).shape) # SS, Y, numerical
#print(h5getters.get_audio_md5(h5))
#print(h5getters.get_bars_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_bars_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_beats_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_beats_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_danceability(h5)) # Y
#print(h5getters.get_duration(h5)) # Y, seconds
#print(h5getters.get_end_of_fade_in(h5)) # Y, seconds
#print(h5getters.get_energy(h5)) # Y
#print(h5getters.get_key(h5)) # Y
#print(h5getters.get_key_confidence(h5)) # Y
#print(h5getters.get_loudness(h5)) # Y
#print(h5getters.get_mode(h5)) # Y
#print(h5getters.get_mode_confidence(h5)) # Y
#print(h5getters.get_release(h5)) # album name
#print(h5getters.get_release_7digitalid(h5))
#print(h5getters.get_sections_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_sections_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_max(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_max_time(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_loudness_start(h5).shape) # SS, Y, numerical
print(h5getters.get_segments_pitches(h5).shape) # SS, Y, numerical, matrix with 12 cols
#print(h5getters.get_segments_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_segments_timbre(h5).shape) # SS, Y, numerical, matrix with 12 cols
#print(h5getters.get_similar_artists(h5).shape) # artist IDs
#print(h5getters.get_song_hotttnesss(h5)) # Y
#print(h5getters.get_song_id(h5)) # song ID
#print(h5getters.get_start_of_fade_out(h5)) # Y, seconds
#print(h5getters.get_tatums_confidence(h5).shape) # SS, Y, numerical
#print(h5getters.get_tatums_start(h5).shape) # SS, Y, numerical
#print(h5getters.get_tempo(h5)) # Y
#print(h5getters.get_time_signature(h5)) # Y, usual number of beats per bar
#print(h5getters.get_time_signature_confidence(h5)) # Y
#print(h5getters.get_title(h5)) # song title
#print(h5getters.get_track_7digitalid(h5))
#print(h5getters.get_track_id(h5))
#print(h5getters.get_year(h5)) # Y, year of release
#print('age:', time.gmtime().tm_year - h5getters.get_year(h5))
#h5.close()
In [ ]:
def extract_msd_track_features(ftrack):
assert os.path.exists(ftrack)
assert ftrack.endswith('.h5') or ftrack.endswith('.H5')
features = []
h5 = h5getters.open_h5_file_read(ftrack)
#print(h5getters.get_artist_terms(h5).shape) # SS, Y, text, word2vec?
#artist_terms_freq = h5getters.get_artist_terms_freq(h5) # SS, Y, numerical
#features.append(np.mean(artist_terms_freq))
#features.append(np.var(artist_terms_freq))
#print(artist_terms_freq) # can be empty
#artist_terms_weight = h5getters.get_artist_terms_weight(h5) # SS, Y, numerical
#features.append(np.mean(artist_terms_weight))
#features.append(np.var(artist_terms_weight))
#print(artist_terms_weight) # can be empty
# use a few percentiles to approximate the distribution: min, 25th, median, 75th, max
def stats_features(ndarray):
if len(ndarray) == 0:
return np.zeros(5).tolist()
else:
assert ndarray.ndim in [1,2]
percentiles = [0, 25, 50, 75, 100]
res = np.percentile(ndarray, q=percentiles, axis=0, interpolation='nearest')
return res.reshape(-1, order='F').tolist()
bars_confidence = h5getters.get_bars_confidence(h5) # SS, Y, numerical
features += stats_features(bars_confidence)
#print(bars_confidence) # can be empty
# 0-4
bars_start = h5getters.get_bars_start(h5) # SS, Y, numerical
features += stats_features(bars_start)
#print(bars_start) # can be empty
# 5-9
beats_confidence = h5getters.get_beats_confidence(h5) # SS, Y, numerical
features += stats_features(beats_confidence)
#print(beats_confidence) # can be empty
# 10-14
beats_start = h5getters.get_beats_start(h5) # SS, Y, numerical
features += stats_features(beats_start)
#print(beats_start) # can be empty
# 15-19
danceability = h5getters.get_danceability(h5) # Y
features.append(danceability)
# 20
### AF
duration = h5getters.get_duration(h5) # Y, seconds
features.append(duration)
# 21
### AF
end_of_fade_in = h5getters.get_end_of_fade_in(h5) # Y, seconds
features.append(end_of_fade_in)
features.append(end_of_fade_in / duration)
# 22-23
### AF
energy = h5getters.get_energy(h5) # Y
features.append(energy)
# 24
### AF
key = h5getters.get_key(h5) # Y
features.append(key)
# 25
### AF
key_confidence = h5getters.get_key_confidence(h5) # Y
features.append(key_confidence)
# 26
### AF
loudness = h5getters.get_loudness(h5) # Y
features.append(loudness)
# 27
### AF
mode = h5getters.get_mode(h5) # Y
features.append(mode)
# 28
### AF
mode_confidence = h5getters.get_mode_confidence(h5) # Y
features.append(mode_confidence)
# 29
### AF
sections_confidence = h5getters.get_sections_confidence(h5) # SS, Y, numerical
features += stats_features(sections_confidence)
#print(sections_confidence) # can be empty
# 30-34
sections_start = h5getters.get_sections_start(h5) # SS, Y, numerical
features += stats_features(sections_start)
#print(sections_start) # can be empty
# 35-39
segments_confidence = h5getters.get_segments_confidence(h5) # SS, Y, numerical
features += stats_features(segments_confidence)
#print(segments_confidence)
# 40-44
segments_loudness_max = h5getters.get_segments_loudness_max(h5) # SS, Y, numerical
features += stats_features(segments_loudness_max)
#print(segments_loudness_max)
# 45-49
segments_loudness_max_time = h5getters.get_segments_loudness_max_time(h5) # SS, Y, numerical
features += stats_features(segments_loudness_max_time)
#print(segments_loudness_max_time)
# 50-54
segments_loudness_start = h5getters.get_segments_loudness_start(h5) # SS, Y, numerical
features += stats_features(segments_loudness_start)
#print(segments_loudness_start)
# 55-59
segments_pitches = h5getters.get_segments_pitches(h5) # SS, Y, numerical, matrix with 12 cols
features += stats_features(segments_pitches)
#features = features + np.mean(segments_pitches, axis=0).tolist()
#features = features + np.var(segments_pitches, axis=0).tolist()
#print(segments_pitches)
# 60-119
segments_start = h5getters.get_segments_start(h5) # SS, Y, numerical
features += stats_features(segments_start)
#print(segments_start)
# 120-124
segments_timbre = h5getters.get_segments_timbre(h5) # SS, Y, numerical, matrix with 12 cols
features += stats_features(segments_timbre)
#features = features + np.mean(segments_timbre, axis=0).tolist()
#features = features + np.var(segments_timbre, axis=0).tolist()
#print(segments_timbre)
# 125-184
song_hotttnesss = h5getters.get_song_hotttnesss(h5) # Y
features.append(song_hotttnesss)
#print(song_hotttnesss) # can be NaN
# 185
### AF
start_of_fade_out = h5getters.get_start_of_fade_out(h5) # Y, seconds
features.append(start_of_fade_out)
features.append(start_of_fade_out / duration)
# 186-187
### AF
tatums_confidence = h5getters.get_tatums_confidence(h5) # SS, Y, numerical
features += stats_features(tatums_confidence)
#print(tatums_confidence) # can be empty
# 188-192
tatums_start = h5getters.get_tatums_start(h5) # SS, Y, numerical
features += stats_features(tatums_start)
#print(tatums_start) # can be empty
# 193-197
tempo = h5getters.get_tempo(h5) # Y
features.append(tempo)
# 198
### AF
time_signature = h5getters.get_time_signature(h5) # Y, usual number of beats per bar
features.append(time_signature)
# 199
### AF
time_signature_confidence = h5getters.get_time_signature_confidence(h5) # Y
features.append(time_signature_confidence)
# 200
### AF
year_of_release = h5getters.get_year(h5) # Y, year of release
age = time.gmtime().tm_year - year_of_release
features.append(age)
# 201
### AF
h5.close()
return np.nan_to_num(np.asarray(features), copy=False)
In [ ]:
#np.nan_to_num?
In [ ]:
#trackID = 'TRQVPBD128F1458060'
#trackID = 'TRZARKN128F92DE096'
#trackID = 'TRZEXLQ128F1491D17'
#gen_h5dir(data_dir, trackID)
#extract_msd_track_features(os.path.join(msd_h5dir, trackID + '.h5')).shape
In [ ]:
def gen_song_features(songID, msd_h5dir = msd_h5dir, song2TrackID = song2tracks):
assert(songID in song2TrackID)
trackIDs = song2TrackID[songID]
for trackID in trackIDs:
msd_h5dir = gen_h5dir(trackID)
h5f = os.path.join(msd_h5dir, trackID + '.h5')
if os.path.exists(h5f):
return extract_msd_track_features(h5f)
else:
continue
# no track available
return None
In [ ]:
#songID = 'SOFDPDC12A58A7D198'
#songID = 'SOKMCJK12A6D4F6105'
#songID = 'SOGTGJR12A6310E08D'
#songID = song_set_msd[139]
#songID = song_set_msd[443]
#songID = song_set_msd[518]
#gen_song_features(songID)
In [ ]:
song_set_msd = sorted(song2tracks.keys())
In [ ]:
len(song_set_msd)
In [ ]:
#ffeatures = os.path.join(data_dir, 'features.pkl')
song2Feature = dict()
cnt = 0
for songID in song_set_msd:
cnt += 1
if cnt % 1000 == 0:
sys.stdout.write('\r%d / %d' % (cnt, len(song_set_msd)))
sys.stdout.flush()
#print(songID)
features = gen_song_features(songID)
#assert(features is not None)
if features is not None:
song2Feature[songID] = features
In [ ]:
len(song2Feature)
In [ ]:
#pkl.dump(song2Feature, open(ffeatures, 'wb'))
In [ ]:
pkl.dump(song2Feature, open(ffeatures_msd, 'wb'))