In [ ]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os, sys, time, gzip
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
datasets = ['aotm2011', '30music']
In [ ]:
dix = 1
dataset_name = datasets[dix]
dataset_name
In [ ]:
data_dir = 'data/%s/setting2' % dataset_name
playlists2 = pkl.load(gzip.open(os.path.join(data_dir, 'playlists_train_dev_test_s2_1.pkl.gz'), 'rb'))
song2artist = pkl.load(gzip.open('data/msd/song2artist.pkl.gz', 'rb'))
In [ ]:
playlists4train = playlists2['train_playlists'] + playlists2['dev_playlists']
len(playlists4train)
In [ ]:
artist_playlist = []
for pl, _ in playlists4train:
pl_artists = [song2artist[sid] if sid in song2artist else '$UNK$' for sid in pl]
artist_playlist.append(pl_artists)
In [ ]:
len(artist_playlist)
In [ ]:
fartist_playlist = 'data/%s/setting2/artist_seq_playlist.txt' % dataset_name
In [ ]:
with open(fartist_playlist, 'w') as fd:
for pl_artists in artist_playlist:
fd.write(' '.join(pl_artists) + '\n')
In [ ]:
fartist2vec_bin = 'data/%s/setting2/artist2vec.bin' % dataset_name
In [ ]:
!word2vec/bin/word2vec
In [ ]:
!word2vec/bin/word2vec -train $fartist_playlist -output $fartist2vec_bin -binary 1 -window 10 -size 50 -iter 10
In [ ]:
import gensim
#artist2vec = gensim.models.Word2Vec.load(fartist2vec_bin)
artist2vec = gensim.models.KeyedVectors.load_word2vec_format(fartist2vec_bin, binary=True)
In [ ]:
aid = artist_playlist[0][3]
artist2vec.get_vector(aid)
In [ ]: