In [15]:
import numpy as np
from sklearn.decomposition import PCA
import doremus_data
In [2]:
training_data_folder = '/Users/pasquale/git/recommender/training_data'
emb_folder = '/Users/pasquale/git/music-embeddings'
doremus_data.init(training_data_folder, emb_folder)
In [22]:
def compute_avg_dist(what):
vectors, uris, lbs, head_dim, heads_print = doremus_data.get_embeddings(what)
pca = PCA(n_components=3)
pca.fit(vectors)
vectors = pca.transform(vectors)
population = vectors.shape[0]
v100 = vectors[np.random.choice(population, min(100, population), replace=False), :]
v1000 = vectors[np.random.choice(population, min(1000, population), replace=False), :]
return np.std(vectors, axis=0), np.std(v100, axis=0), np.std(v1000, axis=0)
In [23]:
compute_avg_dist('key')
Out[23]:
In [44]:
compute_avg_dist('genre')
Out[44]:
In [41]:
compute_avg_dist('mop')
Out[41]:
In [ ]: