In [6]:
import numpy as np
import numpy
import os
class TopRelated(object):
def __init__(self, track_factors):
# fully normalize artist_factors, so can compare with only the dot product
norms = numpy.linalg.norm(track_factors, axis=-1)
self.factors = track_factors / norms[:, numpy.newaxis]
def get_related(self, matrix_index, N=10):
scores = self.factors.dot(self.factors[matrix_index])
best = numpy.argpartition(scores, -N)[-N:]
return sorted(zip(best, scores[best]), key=lambda x: -x[1])
In [12]:
msd_artists_tracks_fname = '../../matchings/msd_lfm-1b/matched_artists_tracks.txt'
msd_track_ids_fname = '../../matchings/msd_lfm-1b/artist_trackname_to_msd_track_ids.txt'
msd_artist_tracks = [line.strip() for line in open(msd_artists_tracks_fname)]
msd_track_ids = [line.strip().split('\t') for line in open(msd_track_ids_fname)]
msd_track_id_to_artists_trackname = {
msd_track_id: artist_trackname
for msd_track_ids_list, artist_trackname in zip(msd_track_ids, msd_artist_tracks)
for msd_track_id in msd_track_ids_list
}
matrix_artist_tracknames_fname = '/../../matchings/both/matched_artists_tracks.txt'
matrix_artist_tracknames = [line.strip() for line in open(matrix_artist_tracknames_fname)]
artist_trackname_to_matrix_index = {
artist_trackname: index
for index, artist_trackname in enumerate(matrix_artist_tracknames)
}
matrix_index_to_artist_trackname = {
index: artist_trackname
for index, artist_trackname in enumerate(matrix_artist_tracknames)
}
msd_track_id_to_matrix_index = {
msd_track_id: artist_trackname_to_matrix_index[artist_trackname]
for msd_track_ids_list, artist_trackname in zip(msd_track_ids, msd_artist_tracks)
for msd_track_id in msd_track_ids_list
}
song_factors_fname = '/home/devin/git/ms-thesis/latent_factors/output/factors_merged_v.npy'
song_factors = np.load(song_factors_fname)
song_factors.shape
Out[12]:
In [7]:
tr = TopRelated(song_factors)
In [8]:
msd_track_id = 'TRCOOYB128E078ED95'
artist_trackname = msd_track_id_to_artists_trackname[msd_track_id]
print(artist_trackname)
matrix_index = msd_track_id_to_matrix_index[msd_track_id]
print(matrix_index)
In [23]:
rel = tr.get_related(matrix_index, N=20)
for r in rel:
print('{0:.2f}\t'.format(r[1]) + matrix_index_to_artist_trackname[r[0]])
In [ ]: