In [6]:
import numpy as np
import numpy
import os

class TopRelated(object):
    def __init__(self, track_factors):
        # fully normalize artist_factors, so can compare with only the dot product
        norms = numpy.linalg.norm(track_factors, axis=-1)
        self.factors = track_factors / norms[:, numpy.newaxis]

    def get_related(self, matrix_index, N=10):
        scores = self.factors.dot(self.factors[matrix_index])
        best = numpy.argpartition(scores, -N)[-N:]
        return sorted(zip(best, scores[best]), key=lambda x: -x[1])

In [12]:
msd_artists_tracks_fname = '../../matchings/msd_lfm-1b/matched_artists_tracks.txt'
msd_track_ids_fname = '../../matchings/msd_lfm-1b/artist_trackname_to_msd_track_ids.txt'

msd_artist_tracks = [line.strip() for line in open(msd_artists_tracks_fname)]
msd_track_ids = [line.strip().split('\t') for line in open(msd_track_ids_fname)]

msd_track_id_to_artists_trackname = {
    msd_track_id: artist_trackname
    for msd_track_ids_list, artist_trackname in zip(msd_track_ids, msd_artist_tracks)
    for msd_track_id in msd_track_ids_list
}

matrix_artist_tracknames_fname = '/../../matchings/both/matched_artists_tracks.txt'
matrix_artist_tracknames = [line.strip() for line in open(matrix_artist_tracknames_fname)]
artist_trackname_to_matrix_index = {
    artist_trackname: index
    for index, artist_trackname in enumerate(matrix_artist_tracknames)
}
matrix_index_to_artist_trackname = {
    index: artist_trackname
    for index, artist_trackname in enumerate(matrix_artist_tracknames)
}

msd_track_id_to_matrix_index = {
    msd_track_id: artist_trackname_to_matrix_index[artist_trackname]
    for msd_track_ids_list, artist_trackname in zip(msd_track_ids, msd_artist_tracks)
    for msd_track_id in msd_track_ids_list
}

song_factors_fname = '/home/devin/git/ms-thesis/latent_factors/output/factors_merged_v.npy'
song_factors = np.load(song_factors_fname)
song_factors.shape


Out[12]:
(661392, 80)

In [7]:
tr = TopRelated(song_factors)

In [8]:
msd_track_id = 'TRCOOYB128E078ED95'
artist_trackname = msd_track_id_to_artists_trackname[msd_track_id]
print(artist_trackname)
matrix_index = msd_track_id_to_matrix_index[msd_track_id]
print(matrix_index)


nirvana	come as you are
229

In [23]:
rel = tr.get_related(matrix_index, N=20)
for r in rel:
    print('{0:.2f}\t'.format(r[1]) + matrix_index_to_artist_trackname[r[0]])


1.00	nirvana	come as you are
0.99	nirvana	lithium
0.98	nirvana	smells like teen spirit
0.97	nirvana	heart-shaped box
0.97	nirvana	rape me
0.97	nirvana	the man who sold the world
0.97	nirvana	about a girl
0.96	nirvana	polly
0.95	nirvana	all apologies
0.94	nirvana	dumb
0.94	nirvana	something in the way
0.94	nirvana	breed
0.93	nirvana	on a plain
0.93	nirvana	drain you
0.92	nirvana	where did you sleep last night
0.91	nirvana	territorial pissings
0.91	nirvana	lounge act
0.91	nirvana	you know you're right
0.90	nirvana	stay away
0.88	rage against the machine	killing in the name

In [ ]: