In [5]:
import math
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from scipy import spatial
from scipy import stats
from sklearn.metrics.pairwise import cosine_similarity

plt.rcParams["figure.figsize"] = (20,5)

import doremus_data

Load data.


In [6]:
training_data_folder = '/Users/pasquale/git/recommender/training_data'
emb_folder = '/Users/pasquale/git/music-embeddings'

doremus_data.init(training_data_folder, emb_folder)
vectors, uris, lbs, heads, heads_print = doremus_data.get_embeddings('expression')
pd.DataFrame(heads_print)


Out[6]:
0 1 2 3 4
0 casting composer genre key composition_date
1 3 3 3 3 1

In [7]:
all_training = doremus_data.all_training('expression')

def training_stats(t):
    num_playlists = len(t['playlists'])
    num_track = [len(p['data']) for p in t['playlists']]
    distinct = len(np.unique(np.concatenate([p['data'] for p in t['playlists']])))
    return num_playlists, np.sum(num_track), np.mean(num_track), distinct

names = [t['name'] for t in all_training]
# for t in all_training:
#     num_playlists, num_track = training_stats(t)
#     print('%s\t\t%d' % (t['name'], num_playlists))
pd.DataFrame([training_stats(t) for t in all_training], index=names, columns='playlists,tracks,tracks per pl, distinct tracks'.split(','))


Out[7]:
playlists tracks tracks per pl distinct tracks
pp_concerts 223 2561 11.484305 2417
itema3_concerts 414 4748 11.468599 4748
web-radio 105 1934 18.419048 1407
spotify_pl 65 1836 28.246154 1425

In [8]:
_l = 5

for t in all_training:
    temp_playlists = []
    for pl in t['playlists']:
        for i in np.arange(len(pl['data']) - _l):
            temp_playlists.append(pl['data'][i:i+_l])
    t['groups'] = [{'name':str(index), 'data': pl} for index, pl in enumerate(temp_playlists)]
    print('%s\t\t%d' % (t['name'], len(temp_playlists)))


pp_concerts		1446
itema3_concerts		2678
web-radio		1409
spotify_pl		1511

Data pre-processing


In [9]:
negVector = -2. * np.ones_like(vectors[0], dtype=np.float32)

def get_embs(x, masked=False):
    # uri to embedding
    v = vectors[np.argwhere(uris == x)]
    if v.size == 0:
        print(x)
        result = np.array(negVector)
    else:
        result = np.array(v[0][0])
    if masked:
        result = np.ma.array(result, mask=result < -1.)
    return result

def get_label(x):
    l =  lbs[np.argwhere(uris == x)]
    return l[0][0] if l.size > 0 else 'none'

In [10]:
np.set_printoptions(2)
def compute_playlist_stats(playlist, to_print=False):
    pl = playlist['data']
    embeddings = np.array([get_embs(xi) for xi in pl])
    emb_len = len(embeddings[0])
    
    ma_embeddings = np.ma.array(embeddings, mask=embeddings < -1.)

    # I do not want to mean dimensions with single values
    mul_values = np.where(np.sum(embeddings >= -1., axis=0) > 1, False, True)
    mul_values = np.repeat([mul_values], len(pl), axis=0)
    ma_embeddings = np.ma.array(ma_embeddings, mask=mul_values)

    _mean = ma_embeddings.mean(axis=0)
    _median = np.ma.median(ma_embeddings, axis=0)
    _min = np.ma.min(ma_embeddings, axis=0)
    _max = np.ma.max(ma_embeddings, axis=0)
    _std = np.ma.std(ma_embeddings, axis=0)
    
    if to_print:
        plt.errorbar(np.arange(len(_mean)), _mean, _std, fmt='ok',
                     ecolor='black', elinewidth=1.5, lw=3, capsize=2)
        plt.errorbar(np.arange(len(_mean)), _mean, [_mean - _min, _max - _mean],
                 fmt='.k', ecolor='gray', elinewidth=0.5, lw=1, capsize=1)

        plt.errorbar(np.arange(len(_mean)), _median, fmt='_g',  lw=1)

        plt.xticks(range(len(heads)), heads, rotation=40)
        plt.show()

    return _mean, _median, _std

Sample playlist


In [11]:
base = all_training[3]
print('Base: ' + base['name'])
pl = base['groups'][1023]
print('Playlist: ' + pl['name'])

m, md, s = compute_playlist_stats(pl, True)

for d in pl['data']:
    print(d.replace('data', 'overture'))
    print(get_label(d)) 
    print(np.ma.array(get_embs(d), mask=get_embs(d) < -1.))


Base: spotify_pl
Playlist: 1023
http://overture.doremus.org/expression/ca951b03-f84b-3a7a-aef0-7023af6e8b35
Symhonies
[0.03590903803706169 0.007821599952876568 0.09708871692419052
 -0.006353070493787527 0.008631796576082706 0.015063508413732052
 0.03582938387989998 -0.015889763832092285 0.02758117951452732
 -0.0902123674750328 0.3212423026561737 0.09681284427642822
 0.8038095235824585]
http://overture.doremus.org/expression/41ace8c4-a1b7-3b9d-8339-026e14ed1b54
Symphonies
[-- -- -- -0.003346540266647935 0.012496697716414928 0.012410242110490799
 0.03334466740489006 -0.001616666791960597 0.0029975688084959984
 -0.1695733219385147 0.1698441505432129 -0.1706843227148056
 0.7371428608894348]
http://overture.doremus.org/expression/b53f42ba-9ec1-3e3d-9aa2-1fbff4405093
Requiem
[-0.05006037652492523 0.05418456718325615 0.05414140969514847
 -0.003299245610833168 0.012398893013596535 0.012594932690262794
 0.0664895549416542 0.014949981123209 -0.04941561073064804
 -0.1695733219385147 0.1698441505432129 -0.1706843227148056
 0.7057142853736877]
http://overture.doremus.org/expression/4baa576b-dfd0-3400-a5cf-656fc67ab960
The inextinguible
[-0.0017500189132988453 0.047637034207582474 0.0068437689915299416
 -0.0063615478575229645 0.008760612457990646 0.014870624989271164
 0.03334466740489006 -0.001616666791960597 0.0029975688084959984 -- -- --
 0.8238095045089722]
http://overture.doremus.org/expression/f9932fa6-d54d-32d0-8280-cef23d7adb1b
3 grandes études
[0.03590903803706169 0.007821599952876568 0.09708871692419052
 -0.00629316084086895 0.00841981079429388 0.014844994992017746
 0.030921481549739838 -0.0332709364593029 0.021514253690838814 -- -- --
 0.7504761815071106]

In [ ]:

Standard Deviation among all playlists


In [12]:
population_tot = np.sum([len(pl['data']) for tr in all_training for pl in tr['groups'] ])
std = {}
population = {}
mean = {}
stdw = {} # std within
stdb = {} # std between
                      
for index, tr in enumerate(all_training):
    name = tr['name']
    std[name] = []
    population[name] = []
    mean[name] = []
    for index, pl in enumerate(tr['groups']):
        _mean, _median, _std = compute_playlist_stats(pl, False)
        pop = len(pl['data'])
        population[name].append(pop)
        mean[name].append(_mean)
        ww = (pop - 1) / (population_tot - 1)
        std[name].append((_std**2)*ww)

    stdw[name] = np.ma.sum(std[name], axis=0).filled(0)

In [13]:
weighted_means = [np.ma.mean(mean[name], axis=0)*np.sum(population[name]) for name in mean]

mtot = np.ma.sum(weighted_means, axis=0)
mtot /= population_tot

In [35]:
fig, ax = plt.subplots(figsize=(20,5))
width = 0.2
pos = np.arange(len(vectors[0]))
colors = ['#3668C9', '#DA3B21', '#FD9827', '#1D9424']


for index, tr in enumerate(all_training):
    name = tr['name']
    
    ww = np.array([pop / (population_tot - 1) for pop in population[name]])
    mg = np.ma.power(np.ma.array(mean[name]).filled(mtot) - mtot, 2)
    stdb[name] = np.ma.sum(mg * ww.reshape(len(ww),1), axis=0)

    plt.bar([p + index * width for p in pos],
        stdb[name],
        width,
        alpha=0.5,
        color=colors[index],
        label=name + ' (between)')
    
    plt.bar([p + index * width for p in pos],
        stdw[name] ,
        width,
        edgecolor='#000000',
        alpha=.5,
        color=colors[index],
        label=name + ' (within)')
    

    
flat_std = [stdw[name] for name in stdw]
mstd = np.ma.mean(flat_std, axis=0)
sstd = np.ma.std(flat_std, axis=0)
smin = np.ma.min(flat_std, axis=0)
smax = np.ma.max(flat_std, axis=0)

# ax.plot(pos, mstd, '--', label='mean')

heads[-1] = 'date'
plt.xticks(range(len(heads)), heads, rotation=40)
ax.set_ylabel('Variance')
ax.set_xticks([p + 1.5 * width for p in pos])

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, labels, loc='upper left')
plt.show()

print('Standard Deviation: mean' )
print(np.array(mstd))
print('Standard Deviation: minimum' )
print(np.array(smin))

flat_std_b = [stdb[name] for name in stdb]
mstd_b = np.ma.mean(flat_std_b, axis=0)
print('Standard Deviation between: mean' )
print(np.array(mstd_b))


Standard Deviation: mean
[2.33e-04 3.75e-05 7.31e-05 6.52e-07 2.19e-06 4.60e-06 1.36e-05 3.53e-05
 6.20e-05 5.18e-04 5.67e-04 6.83e-04 3.62e-04]
Standard Deviation: minimum
[0.00e+00 0.00e+00 0.00e+00 2.12e-07 1.47e-06 2.89e-06 3.69e-06 8.53e-06
 1.10e-05 0.00e+00 0.00e+00 0.00e+00 8.17e-05]
Standard Deviation between: mean
[1.61e-04 5.63e-05 8.80e-05 5.31e-06 4.64e-06 1.09e-05 3.99e-05 3.65e-04
 1.12e-04 5.11e-04 7.17e-04 6.09e-04 1.51e-03]

Explaination

When the standard deviation within (black border) is smaller then the standard deviation between (no border), this means that for that dimension the values are more homogeneous inside the group than outside. If this difference in homogeneity inside/outside is important, I can state that this dimension drives the playlist generation.

Graphs property by property


In [15]:
def display_graph(feat):
    pos = np.where(np.array(heads) == feat)[0]
    pos_slide = np.arange(len(pos))
    fig, ax = plt.subplots(figsize=(20,5))

    for index, tr in enumerate(all_training):
        name = tr['name']
        plt.bar([p + index * width for p in pos_slide],
            stdb[name][pos],
            width,
            alpha=0.5,
            color=colors[index],
            label=name + ' (sdt between)')

        plt.bar([p + index * width for p in pos_slide],
            stdw[name][pos] ,
            width,
            alpha=0.5,
            edgecolor='#000000',
            color=colors[index],
            label=name + ' (sdt within)')

    ax.plot(pos_slide, mstd[pos], '--', label='mean')

    plt.xticks(pos_slide, np.array(heads)[pos], rotation=40)
    ax.set_ylabel('Standard Deviation')
    ax.set_xticks([p + 1.5 * width for p in pos_slide])
#     handles, labels = ax.get_legend_handles_labels()
#     ax.legend(handles, labels, loc='upper left')
    plt.show()
    
    
for _f in heads_print[0]:
    display_graph(_f)


For concerts


In [16]:
flat_std = [s for name in ['pp_concerts','itema3_concerts'] for s in std[name]]
mstd = np.ma.mean(flat_std, axis=0)
sstd = np.ma.std(flat_std, axis=0)
smin = np.ma.min(flat_std, axis=0)
smax = np.ma.max(flat_std, axis=0)

print('Standard Deviation: mean' )
print(np.array(mstd))
print('Standard Deviation: minimum' )
print(np.array(smin))
print('Standard Deviation: maximum' )
print(np.array(smax))


Standard Deviation: mean
[5.75e-08 2.07e-08 3.08e-08 6.18e-10 1.32e-09 2.55e-09 5.15e-09 9.59e-09
 8.84e-09 1.76e-06 1.45e-06 2.51e-06 1.69e-07]
Standard Deviation: minimum
[0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00 0.00e+00
 0.00e+00 3.32e-08 3.55e-09 3.28e-08 0.00e+00]
Standard Deviation: maximum
[4.34e-07 2.16e-07 2.62e-07 7.64e-09 1.45e-08 1.61e-08 1.66e-07 2.10e-07
 2.68e-07 6.89e-06 5.72e-06 8.69e-06 4.12e-05]

For playlists


In [17]:
flat_std = [s for name in ['web-radio','spotify_pl'] for s in std[name]]
mstd = np.ma.mean(flat_std, axis=0)
sstd = np.ma.std(flat_std, axis=0)
smin = np.ma.min(flat_std, axis=0)
smax = np.ma.max(flat_std, axis=0)


print('Standard Deviation: mean' )
print(np.array(mstd))
print('Standard Deviation: minimum' )
print(np.array(smin))
print('Standard Deviation: maximum' )
print(np.array(smax))


Standard Deviation: mean
[3.11e-07 4.85e-08 9.58e-08 1.72e-10 1.46e-09 3.33e-09 1.33e-08 3.83e-08
 7.59e-08 1.34e-06 1.50e-06 1.76e-06 3.87e-07]
Standard Deviation: minimum
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Standard Deviation: maximum
[1.58e-06 3.18e-07 5.52e-07 1.71e-09 1.02e-08 1.33e-08 1.61e-07 3.38e-07
 6.62e-07 7.71e-06 8.56e-06 8.69e-06 7.55e-06]

In [ ]:

Computing gaps


In [18]:
def get_std_gap(_chosen):
#     return (stdb[_chosen] - stdw[_chosen]) / ((stdw[_chosen] + stdb[_chosen]) / 2 )
    return  stdb[_chosen] / stdw[_chosen]

def display_distances(_chosen):
    better =(stdw[_chosen]<=stdb[_chosen]).tolist()
    distance = get_std_gap(_chosen)

    pd.set_option('precision', 3)
    return pd.DataFrame([better, distance.tolist(), stdw[_chosen].tolist(), stdb[_chosen].tolist()], 
                        index=['homongeneous', '% rate', 'std within', 'std between'], columns=heads)

In [19]:
display_distances('spotify_pl')


Out[19]:
casting casting casting composer composer composer genre genre genre key key key composition_date
homongeneous False True True True True True False True False False True False True
% rate 0.5 1.45 1.14 4.15 1.47 1.37 0.999 5.5 0.728 0.886 1.12 0.843 2.65
std within 0.000475 7.24e-05 0.000144 2.9e-07 2.72e-06 5.75e-06 2.05e-05 5.09e-05 9.42e-05 0.00105 0.00125 0.00136 0.000646
std between 0.000237 0.000105 0.000164 1.21e-06 4e-06 7.87e-06 2.05e-05 0.00028 6.86e-05 0.000931 0.00139 0.00115 0.00171

In [20]:
display_distances('web-radio')


Out[20]:
casting casting casting composer composer composer genre genre genre key key key composition_date
homongeneous False True True True True True True True False True True True True
% rate 0.444 1.32 1.01 5.41 1.73 1.88 1.16 3.91 0.495 1.19 1.47 1.04 1.26
std within 0.000432 6.88e-05 0.000135 2.12e-07 1.55e-06 3.96e-06 1.81e-05 6.05e-05 0.000127 0.000868 0.000894 0.00115 0.000478
std between 0.000192 9.09e-05 0.000137 1.15e-06 2.68e-06 7.46e-06 2.1e-05 0.000236 6.27e-05 0.00103 0.00132 0.0012 0.000604

In [21]:
display_distances('pp_concerts')


Out[21]:
casting casting casting composer composer composer genre genre genre key key key composition_date
homongeneous True True True True True True True True True False True False True
% rate 8.67 3.29 3.82 14.9 2.49 5.88 3.26 5.07 9.85 0.536 1.23 0.42 26.5
std within 2.49e-05 8.95e-06 1.33e-05 1.13e-06 1.47e-06 2.89e-06 3.69e-06 8.53e-06 1.1e-05 0.000155 0.000127 0.000221 8.17e-05
std between 0.000216 2.94e-05 5.09e-05 1.68e-05 3.64e-06 1.7e-05 1.21e-05 4.33e-05 0.000108 8.31e-05 0.000157 9.26e-05 0.00216

In [22]:
display_distances('itema3_concerts')


Out[22]:
casting casting casting composer composer composer genre genre genre key key key composition_date
homongeneous True True True True True True True True True True True True True
% rate None None None 2.14 2.74 1.98 8.68 42.7 12.7 None None None 6.37
std within 0 0 0 9.76e-07 3.02e-06 5.79e-06 1.22e-05 2.11e-05 1.63e-05 0 0 0 0.000244
std between 0 0 0 2.09e-06 8.26e-06 1.14e-05 0.000106 0.0009 0.000208 0 0 0 0.00155

2 different tendences between concerts and playlists.

In concerts all the dimensions (where they exist) are more homogeneous. This is true in particular for the casting (not to be used instead for playlists), and this is reasonable.

The 1st dimension of composer is also one to take in account. In Itema3 this is not visible probably because of bad interlinking of artists.

For the keys, the values are not so relevant and not stable in positiveness.

Always positive (in bold the largely ones):

  • composer[0]
  • composer[1,2]
  • genre[2]
  • composition_date
  • key[2]
  • casting[2]

Positive only in concerts:

  • casting[all]
  • genre[all]
  • composition_date

Tuning the recommender system


In [23]:
def compute_weights(threshold=1.3, fallback = .6, datasets =['spotify_pl']):
    dist = [get_std_gap(_chosen) for _chosen in datasets]
    dist = np.ma.mean(dist, axis=0).filled()

    return np.where(dist > threshold, dist, fallback)

w = compute_weights()
pd.DataFrame([w], columns=heads)


Out[23]:
casting casting casting composer composer composer genre genre genre key key key composition_date
0 0.6 1.45 0.6 4.155 1.467 1.368 0.6 5.505 0.6 0.6 0.6 0.6 2.653

In [24]:
def get_pool_from_datasets(datasets=['web-radio','spotify_pl']):
    all_song = []
    for t in all_training:
        if t['name'] in datasets:
            for pl in t['playlists']:
                 all_song = np.concatenate([all_song, pl['data']])
    all_song = np.unique(all_song)
    print('Pool size: %d' % len(all_song))
    all_song_vec = np.ma.array([get_embs(xi, masked=True) for xi in all_song])
#     all_song_vec = np.ma.array(all_song_vec, mask=all_song_vec < -1.)
    all_song_labels = np.array([get_label(xi) for xi in all_song])
    return all_song, all_song_vec, all_song_labels

In [25]:
def computeSimilarity(seed, target, w):
    b1 = np.where(seed.mask==True)[0]
    b2 = np.where(target.mask==True)[0]
    bad_pos = np.unique(np.concatenate([b1, b2]))

    _seed = np.delete(seed, bad_pos, axis=0)
    _target = np.delete(target, bad_pos, axis=0)
    _w = np.delete(w, bad_pos, axis=0)

    if len(_seed) == 0:
        return 0

    # distance
    d = weightedL2(_seed, _target, _w)

    # how much info I am not finding
    penalty = len([x for x in b2 if x not in b1]) / len(seed)

    # score
    s = (max_distance - d) / max_distance
    return s * (1 - penalty)


def weightedL2(a, b, w=1):
#     return distance.cosine(a,b)
    # https://stackoverflow.com/a/8861999/1218213
    q = a - b
    return np.sqrt((w * q * q).sum())
#     return (w * q * q).sum()


_ones = np.ones(vectors[0].shape)
max_distance = weightedL2(_ones,-_ones, _ones)

In [26]:
def find(seed, n=4, w=None, _print=True, pool=get_pool_from_datasets()):
    global max_distance
    
    _uris =  pool[0]
    _vectors = pool[1]
    _lbs = pool[2]

    
    f_length = len(seed)
    
    _seed = seed
    
    if w is None:
        w = np.ones(len(_seed))
        w = w / w.sum()
    else:
        w = np.array(w)
#         temp = [np.ones(f_length[k]) * w[k] for k in range(len(w))]
#         w = np.array([item for sublist in temp for item in sublist])

    max_distance = weightedL2(np.ones(len(_seed)), np.ones(len(_seed)) * -1, w)

    if _print==True: print('computing scores')
    scores = np.array([[computeSimilarity(_seed, x.astype(float), w) for x in _vectors]])
    full = np.concatenate([_uris.reshape(len(_uris), 1), scores.transpose(), _lbs.reshape(len(_uris), 1)], axis=1)

    # remove the seed from the list
#     full = np.delete(full, pos, 0)

    # sort
    full_sorted = sorted(full, key=lambda _x: float(_x[1]), reverse=True)
    most_similar = full_sorted[:n]
    if _print==True: print('\n'.join('%s %s\n%s' % (f[0], f[1], f[2]) for f in most_similar))

    return [{'uri': _a[0], 'score': float(_a[1])} for _a in most_similar]


Pool size: 2563

In [33]:
find(get_embs('http://data.doremus.org/expression/edfbf89b-1464-32d5-b0e0-3b8db4d80ef1', masked=True))


computing scores
http://data.doremus.org/expression/edfbf89b-1464-32d5-b0e0-3b8db4d80ef1 1.0
Lettre à Elise
http://data.doremus.org/expression/700d8ae4-10b4-323a-91a7-e7721fcc461a 0.9963845712577681
Songs without words
http://data.doremus.org/expression/6b78257e-d9ac-3731-a87d-c69d1a7fce85 0.990682769984646
Rondos. Guitare. La mineur. Op. 2, no 3
http://data.doremus.org/expression/ca3a3163-830f-33f2-94a7-22eda0b9ae7d 0.987704539913521
Toccatas. Piano. Do majeur. Op. 7
Out[33]:
[{'score': 1.0,
  'uri': 'http://data.doremus.org/expression/edfbf89b-1464-32d5-b0e0-3b8db4d80ef1'},
 {'score': 0.9963845712577681,
  'uri': 'http://data.doremus.org/expression/700d8ae4-10b4-323a-91a7-e7721fcc461a'},
 {'score': 0.990682769984646,
  'uri': 'http://data.doremus.org/expression/6b78257e-d9ac-3731-a87d-c69d1a7fce85'},
 {'score': 0.987704539913521,
  'uri': 'http://data.doremus.org/expression/ca3a3163-830f-33f2-94a7-22eda0b9ae7d'}]

In [28]:
find(get_embs('http://data.doremus.org/expression/73e749e6-b727-3bfb-bcea-d895c86ec46c', masked=True))


computing scores
http://data.doremus.org/expression/73e749e6-b727-3bfb-bcea-d895c86ec46c 1.0
La Walkyrie
http://data.doremus.org/expression/4edab563-b6d7-38d5-b5b4-8cab4ce81ba6 0.9997358537588898
L'or du Rhin
http://data.doremus.org/expression/e2ed5a11-300b-35d6-bf3e-3044d00c642e 0.9987594389083904
Les Troyens
http://data.doremus.org/expression/784e4581-dd8e-3110-b052-4526f9c0025c 0.9982900558315764
Lohengrin
Out[28]:
[{'score': 1.0,
  'uri': 'http://data.doremus.org/expression/73e749e6-b727-3bfb-bcea-d895c86ec46c'},
 {'score': 0.9997358537588898,
  'uri': 'http://data.doremus.org/expression/4edab563-b6d7-38d5-b5b4-8cab4ce81ba6'},
 {'score': 0.9987594389083904,
  'uri': 'http://data.doremus.org/expression/e2ed5a11-300b-35d6-bf3e-3044d00c642e'},
 {'score': 0.9982900558315764,
  'uri': 'http://data.doremus.org/expression/784e4581-dd8e-3110-b052-4526f9c0025c'}]

In [34]:
find(get_embs('http://data.doremus.org/expression/6dd59267-fef0-392d-911f-7abbe676e289', masked=True))


computing scores
http://data.doremus.org/expression/6dd59267-fef0-392d-911f-7abbe676e289 1.0
Les quatre saisons
http://data.doremus.org/expression/f57d703c-276b-3fbf-bcdb-cfdc3e56ea59 0.9985493985051243
12 concertos
http://data.doremus.org/expression/6b5881ec-2df3-368c-9c1d-fc542c955c4e 0.9981870395360538
Concertos. Clavier, flûtes à bec (2), orchestre à cordes, basse continue. Fa majeur. BWV 1057
http://data.doremus.org/expression/d9b19dff-b0eb-3221-b4f1-31fb7e651b91 0.9978019939605289
6 concertos
Out[34]:
[{'score': 1.0,
  'uri': 'http://data.doremus.org/expression/6dd59267-fef0-392d-911f-7abbe676e289'},
 {'score': 0.9985493985051243,
  'uri': 'http://data.doremus.org/expression/f57d703c-276b-3fbf-bcdb-cfdc3e56ea59'},
 {'score': 0.9981870395360538,
  'uri': 'http://data.doremus.org/expression/6b5881ec-2df3-368c-9c1d-fc542c955c4e'},
 {'score': 0.9978019939605289,
  'uri': 'http://data.doremus.org/expression/d9b19dff-b0eb-3221-b4f1-31fb7e651b91'}]

In [46]:
def recommend_compare(playlist, w, pos=-1, num_candidates=[100, 200, 500],
                      verbose=True, pool=get_pool_from_datasets(), overture=True):
    pl_data = playlist['data']
    pl_population = len(pl_data)
    if verbose: print('%d items | %s' % (pl_population, playlist['name'].split('/')[-1]))
    
    _replcm = 'overture.' if overture else 'data.'
    
    if pos < 0:
        pos = random.randrange(pl_population)
   
    chosen = pl_data[pos]
    targets = pl_data
#     [max(pos-7,0):min(pos+7, len(pl_data))]
    targets_pop = len(targets)
#     print(max(pos-5,0))
#     print(min(pos+5, len(pl_data)))
#     print(targets_pop)

    if verbose: 
        print('seed: %d) %s' % (pos, get_label(chosen)))
        print('\t '+ chosen.replace('data.', _replcm))

    
    first = get_embs(chosen, masked=True)

    candidates = find(first, n=np.max(num_candidates), _print=False, pool=pool )
    candidates_2 = find(first, n=np.max(num_candidates), _print=False, w = w, pool=pool )
    
    results = np.zeros((len(num_candidates), 3))
    
    for qi, q in enumerate(num_candidates):
        trues_flat = 0
        for index, c in enumerate(candidates[:q]):
            if c['uri'] == chosen: continue
            if c['uri'] in targets: trues_flat+=1
#             if verbose:
#                 _sig = '  X  ' if c['uri'] in targets else '   '
#                 print('%d \t %.5f'% (index, c['score']) + '\t' + _sig + '\t' + get_label(c['uri']))
#                 print('\t\t\t\t'+ c['uri'].replace('data.', 'overture.'))
        #     display(pd.DataFrame(np.ma.array( candidates[c]).reshape(1, 13)))


        trues = 0
        for index, c in enumerate(candidates_2[:q]):
            if c['uri'] == chosen: continue
            if c['uri'] in targets: trues+=1
#             if verbose:
#                 _sig = '  X  ' if c['uri'] in pl_data else '   '
#                 print('%d \t %.5f'% (index, c['score']) + '\t' + _sig + '\t' + get_label(c['uri']))
#                 print('\t\t\t\t'+ c['uri'].replace('data.', 'overture.'))
#                 # display(pd.DataFrame(np.ma.array( candidates[c]).reshape(1, 13)))

        if verbose: print('%d | flat %d | weighted %d | diff %d' % (q, trues_flat, trues, trues-trues_flat))
        results[qi] = [trues / targets_pop, trues_flat / targets_pop, (trues-trues_flat) / targets_pop]
    return results


Pool size: 2563

In [47]:
pl = all_training[3]['playlists'][55]
recommend_compare(pl, w, 9)


47 items | 37i9dQZF1DXaSipEWiHbyL.Classical World: USA.expression.txt
seed: 9) Electric counterpoint
	 http://overture.doremus.org/expression/358ec945-c4a0-381e-921e-8fba13266007
100 | flat 11 | weighted 14 | diff 3
200 | flat 16 | weighted 18 | diff 2
500 | flat 24 | weighted 28 | diff 4
Out[47]:
array([[0.3 , 0.23, 0.06],
       [0.38, 0.34, 0.04],
       [0.6 , 0.51, 0.09]])

Computation on all playlists


In [48]:
out_path = './out'

def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
    return file_path

from tqdm import tqdm_notebook as tqdm

In [49]:
def test_recommendation(pool, playlist, pos=-1, mode='random', w=None, name='', verbose=0, overture=False, write=False):
    pl_data = playlist['data']
    pl_population = len(pl_data)
    pl_name = playlist['name'].split('/')[-1].replace('.json', '')
    
    _replcm = 'overture.' if overture else 'data.'

    if pos < 0:
        pos = random.randrange(pl_population)
    chosen = pl_data[pos]
    pl_data = np.delete(pl_data, pos, axis=0)

    if verbose > 0: 
        print('%d items | %s' % (pl_population, pl_name))
        print('seed: %d) %s' % (pos, get_label(chosen)))
        print('\t '+ chosen.replace('data.', _replcm))

    
    first = get_embs(chosen, masked=True)
    num_candidates=[100, 200, 500]
    max_candidates = np.max(num_candidates)

    if mode == 'flat':
        candidates = find(first, n=max_candidates, _print=False, pool=pool)
    elif mode == 'weighted':
        candidates = find(first, n=max_candidates, _print=False, w = w, pool=pool)
    else : # random
        candidates =  list(map(lambda x: {'uri': x, 'score': 0}, random.sample(pool[0].tolist(), 500)))

    results = np.zeros(len(num_candidates))
    candidates_uri =  list(map(lambda x: x['uri'], candidates))

    for qi, q in enumerate(num_candidates):
        trues = len(set(candidates_uri[:q]).intersection(set(pl_data)))
        
        if verbose > 0:
            print('%d | positive %d | population %d' % (q, trues, pl_population))
        results[qi] = trues

    if verbose > 1:
        for index, c in enumerate(candidates[:max_candidates]):
            _sig = '  X  ' if c['uri'] in pl_data else '   '
            print('%d \t %.5f'% (index, c['score']) + '\t' + _sig + '\t' + get_label(c['uri']))
            print('\t\t\t\t'+ c['uri'].replace('data.', _replcm))
            # display(pd.DataFrame(np.ma.array( candidates[c]).reshape(1, 13)))

    if write:
        to_write =[';'.join([
                    str(index),
                    str(c['score']), 
                    '1' if c['uri'] in pl_data else '0',
                    get_label(c['uri']),
                    c['uri'].replace('[;\n"]', ' ')
                   ])
            for index, c in enumerate(candidates[:max_candidates])]

        filename = pl_name + '.' + str(pos) + '.csv'
        with open(ensure_dir(os.path.join(out_path, 'detail', name, filename)), 'w') as file:
            file.write('index;score;predicted;label;uri\n')
            file.write('\n'.join(to_write))

    return results

In [50]:
def run_for_dataset(id_dataset, pool, mode='random', w=None, name=''):
    with open(ensure_dir(os.path.join(out_path, 'summary', name + '.csv')), 'w') as file:
        file.write('index;playlist;population;predicted100;predicted200;predicted500\n')
        testset = all_training[id_dataset]['playlists']
        
        pbar = tqdm(total=len(testset))

        for index, pl in enumerate(testset):
            population = len(pl['data'])
            pl_name = pl['name'].split('/')[-1].replace('.json', '').replace('"','')

            results = [test_recommendation(pool=pool, playlist=pl, pos=pos,  
                                    mode=mode, w=w, write=False, name=name)
                       for pos, work in enumerate(pl['data'])]
            results = np.mean(results,axis=0)
            
            file.write(';'.join([str(index), pl_name, str(population), 
                                 str(results[0]), str(results[1]), str(results[2])]))
            file.write('\n')
            pbar.update(1)
    pbar.close()

In [423]:
# test_recommendation(pp_pool, playlist=all_training[0]['playlists'][4], mode='weighted', name='pp.w5-06', w=_wpp, verbose=2 )

In [363]:
# itema3_pool = get_pool_from_datasets(['itema3_concerts'])

In [369]:
run_for_dataset(1, itema3_pool, mode='random', name='itema3.rand')

In [371]:
run_for_dataset(1, itema3_pool, mode='flat', name='itema3.flat')

In [378]:
_wi3 = compute_weights(threshold=5, datasets=['itema3_concerts'])
run_for_dataset(1, itema3_pool, mode='weighted', name='itema3.w5-06', w= _wi3)

In [437]:
_wi3 = compute_weights(threshold=8, datasets=['itema3_concerts'])
run_for_dataset(1, itema3_pool, mode='weighted', name='itema3.w8-06', w= _wi3)

In [444]:
_wi3 = compute_weights(threshold=10, datasets=['itema3_concerts'])
run_for_dataset(1, itema3_pool, mode='weighted', name='itema3.w10-06', w= _wi3)

In [445]:
_wi3 = compute_weights(threshold=8, fallback=1., datasets=['itema3_concerts'])
run_for_dataset(1, itema3_pool, mode='weighted', name='itema3.w8-1', w= _wi3)

In [ ]:


In [ ]:


In [ ]:


In [ ]:
_wi3 = compute_weights(threshold=5, datasets=['itema3_concerts', 'pp_concerts'])
run_for_dataset(1, itema3_pool, mode='weighted', name='itema3.wp5-06', w= _wi3)

In [ ]:
pp_pool = get_pool_from_datasets(['pp_concerts'])

In [ ]:
run_for_dataset(0, pp_pool, mode='random', name='pp.rand')

In [ ]:
run_for_dataset(0, pp_pool, mode='flat', name='pp.flat')

In [ ]:
_wpp = compute_weights(threshold=5, datasets=['pp_concerts'])
# pd.DataFrame([_wpp], columns=heads)

In [ ]:
run_for_dataset(0, pp_pool, mode='weighted', name='pp.w5-06', w=_wpp)

In [ ]:
spo_pool = get_pool_from_datasets(['spotify_pl'])

In [ ]:
run_for_dataset(3, spo_pool, mode='random', name='spotify.rand')

In [ ]:
run_for_dataset(3, spo_pool, mode='flat', name='spotify.flat')

In [ ]:
_wspo = compute_weights(threshold=1.3, datasets=['spotify_pl'])
run_for_dataset(3, spo_pool, mode='weighted', name='spotify.w13-06', w=_wspo)

In [54]:
radio_pool = get_pool_from_datasets(['web-radio'])


Pool size: 1407

In [ ]:
run_for_dataset(2, radio_pool, mode='random', name='web-radio.rand')

In [ ]:
run_for_dataset(2, radio_pool, mode='flat', name='web-radio.flat')

In [ ]:
_wradio = compute_weights(threshold=1.4, datasets=['web-radio'])
run_for_dataset(2, radio_pool, mode='weighted', name='web-radio.w14-06', w=_wradio)

In [59]:
_wradio = compute_weights(threshold=1.4, datasets=['web-radio'])
_wradio[-1] *= 2
run_for_dataset(2, radio_pool, mode='weighted', name='web-radio.wd14-06', w=_wradio)

In [56]:
_wradio = compute_weights(threshold=1.5, datasets=['web-radio'])
run_for_dataset(2, radio_pool, mode='weighted', name='web-radio.w15-06', w=_wradio)

In [ ]:
_wradio = compute_weights(threshold=1.4, datasets=['web-radio', 'spotify_pl'])
run_for_dataset(2, radio_pool, mode='weighted', name='web-radio.ws14-06', w=_wradio)

In [ ]:


In [61]:
summary_path = os.path.join(out_path, 'summary')
columns = ['name', 'r100', 'r200', 'r500']
summary = pd.DataFrame(columns=columns)

for index, filename in enumerate(sorted(os.listdir(summary_path))):
    table = pd.read_csv(os.path.join(summary_path,filename), sep=';')
    table['r100'] = table.apply(lambda row: row['predicted100']/row['population'], axis=1)
    table['r200'] = table.apply(lambda row: row['predicted200']/row['population'], axis=1)
    table['r500'] = table.apply(lambda row: row['predicted500']/row['population'], axis=1)
    r100 = table['r100'].mean()
    r200 = table['r200'].mean()
    r500 = table['r500'].mean()
    summary.loc[index] = [filename, r100, r200, r500]
summary


Out[61]:
name r100 r200 r500
0 itema3.flat.csv 0.363 0.423 0.506
1 itema3.rand.csv 0.018 0.039 0.095
2 itema3.w10-06.csv 0.367 0.417 0.491
3 itema3.w5-06.csv 0.370 0.425 0.502
4 itema3.w8-06.csv 0.381 0.431 0.498
5 itema3.w8-1.csv 0.378 0.430 0.500
6 itema3.wp5-06.csv 0.362 0.422 0.498
7 pp.flat.csv 0.475 0.536 0.634
8 pp.rand.csv 0.037 0.076 0.188
9 pp.w5-06.csv 0.479 0.546 0.640
10 spotify.flat.csv 0.170 0.258 0.472
11 spotify.rand.csv 0.067 0.131 0.336
12 spotify.w13-06.csv 0.174 0.264 0.487
13 web-radio.flat.csv 0.121 0.195 0.388
14 web-radio.rand.csv 0.058 0.117 0.301
15 web-radio.w14-06.csv 0.119 0.194 0.385
16 web-radio.w15-06.csv 0.120 0.193 0.385
17 web-radio.wd14-06.csv 0.122 0.202 0.396
18 web-radio.ws14-06.csv 0.125 0.202 0.401

In [ ]:
rc = {}
quantities = [100, 200, 500]
_headers = np.array([['weighted %d' % q, 'flat %d' % q, 'diff %d' % q] for q in quantities]).flatten()

def compute_for_dataset(dataset, w):
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for pl, idxp in log_progress(playlists, name=dataset['name']):
        rcomp = [recommend_compare(pl, w, elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        current[idxp] = np.mean(rcomp, axis=0)
    return current

In [ ]:
def evaluate(w, toPrint=False):
    for dataset, idxd in log_progress(all_training[2:4], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset, w)
    
    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    if toPrint:    
        return pd.DataFrame(
            _data,
            index = [ds for ds in rc],
            columns = _headers
        )
    else:
        return _data

In [ ]:
evaluate(w, True)

I try with different values


In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold)

    for dataset, idxd in log_progress(all_training[2:4], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
from IPython.display import display, HTML

for dataset in all_training[2:4]:
    playlists = dataset['playlists']
    for pl in playlists:
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        missing = [len(np.where(get_embs(item) < -1.)[0]) for item in pl['data']]
        _counts = sorted(pd.Series(missing).value_counts().reset_index().values.tolist())
        _pt = ['%d (%d)' % (item[0], item[1]) for item in _counts]
        print('\t\t\t'+'\t'.join(_pt))

In [ ]:
try_with(1.4)

In [ ]:
try_with(1.5)

In [ ]:
try_with(2.)

In [ ]:
try_with(1.2)

In [ ]:
try_with(1.1)

In [ ]:
try_with(1.)

In [ ]:
for dataset in all_training[3:4]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, w, elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
0.00030769230769230765 | 0.007507692307692307 | 0.013246153846153847

In [ ]:
for dataset in all_training[2:3]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, w, elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
0.00006349206349206348 | 0.0015492063492063492 | 0.0027333333333333333

In [ ]:
for dataset in all_training[3:4]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, compute_weights(threshold=1.4), elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
for dataset in all_training[2:3]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, compute_weights(threshold=1.4), elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
for dataset in all_training[3:4]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, compute_weights(threshold=1.5), elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
for dataset in all_training[2:3]:
    playlists = dataset['playlists']
    current = np.zeros((len(playlists), 3*len(quantities)))
    for idxp, pl in enumerate(playlists):
        rcomp = [recommend_compare(pl, compute_weights(threshold=1.5), elem, num_candidates=quantities, verbose=False).flatten()
                 for elem in np.arange(len(pl['data']))]
        print(pl['name'].split('/')[-1].replace('.expression.txt', ''))
        print(len(pl['data']))
        current[idxp] = np.mean(rcomp, axis=0)
        _c = current[idxp]
        print('%.3f | %.3f | %.3f' %(_c[2],_c[5], _c[8]))
    print(dataset['name'])
    print(np.mean(current, axis=0))
    print("==========================")

In [ ]:
def display_playlist(name, virtuoso=True):
    for base in all_training:
        for index, pl in enumerate(base['playlists']):
            _p = pl['name'].split('/')[-1].replace('.expression.txt', '').replace('.json', '')
            
            if _p == name:
                print('Playlist %d: %s' % (index, pl['name']))

                m, md, s = compute_playlist_stats(pl, True)

                for d in pl['data']:
                    if virtuoso :
                        print(d)
                    else :
                        print(d.replace('data', 'overture'))
                    print(get_label(d)) 
                    print(np.ma.array(get_embs(d), mask=get_embs(d) < -1.))
                return

In [ ]:
display_playlist('37i9dQZF1DXaSipEWiHbyL.Classical World: USA')

In [ ]:
display_playlist('FM-401_20171030_00-23')

In [ ]:
pl = all_training[3]['playlists'][63]
recommend_compare(pl, w, 15)

In [ ]:
w = compute_weights(datasets=['spotify_pl', 'web-radio'])
pd.DataFrame([w], columns=heads)

In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold, datasets=['spotify_pl', 'web-radio'])

    for dataset, idxd in log_progress(all_training[2:4], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
try_with(1.3)

In [ ]:
try_with(1.5)

In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold, datasets=['web-radio'])

    for dataset, idxd in log_progress(all_training[2:4], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
try_with(1.4)

In [ ]:
try_with(1.5)

In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold, datasets=['pp_concerts', 'itema3_concerts'])

    for dataset, idxd in log_progress(all_training[2:4], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
try_with(3.)

In [ ]:
try_with(5.)

In [ ]:
try_with(8.)

In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold, datasets=['pp_concerts', 'itema3_concerts'])

    for dataset, idxd in log_progress(all_training[0:2], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
try_with(3.)

In [ ]:
try_with(5.)

In [ ]:
try_with(8.)

In [ ]:
def try_with(threshold):
    w = compute_weights(threshold=threshold, datasets=['itema3_concerts'])

    for dataset, idxd in log_progress(all_training[0:2], name='datasets'):
        rc[dataset['name']] = compute_for_dataset(dataset,w)

    _data = np.array([np.mean(rc[ds], axis=0) for ds in rc])

    return pd.DataFrame(
        _data,
        index = [ds for ds in rc],
        columns = _headers
    )

In [ ]:
try_with(5.)

In [ ]:
try_with(10.)

In [ ]:
pl = all_training[0]['playlists'][18]
recommend_compare(pl, w, 5, verbose=True, pool=get_pool_from_datasets(['pp_concerts']))

In [ ]:
display_playlist('163468dd-41cc-3818-a874-c867959fe603', virtuoso=False)

In [ ]:
_seed = get_embs('http://data.doremus.org/expression/79456370-2eb3-3abe-91d6-326bc59180a5', masked=True)
_target = get_embs('http://data.doremus.org/expression/2dec45a4-545a-352d-b974-d5b5d3fd6a69', masked=True)
computeSimilarity(_seed, _target, w)

In [ ]:
all_f = find(_seed, _print=False, n=3000)

In [ ]:
for a in all_f:
    if a['uri'] == 'http://data.doremus.org/expression/2dec45a4-545a-352d-b974-d5b5d3fd6a69':
        print('found')

In [ ]:
for a in all_song:
    if a == 'http://data.doremus.org/expression/2dec45a4-545a-352d-b974-d5b5d3fd6a69':
        print('found')

In [ ]:
display_playlist('0a2e8bab-a762-3e13-8a1e-236c31976b75', virtuoso=True)

In [ ]:
display_playlist('37i9dQZF1DXaSipEWiHbyL.Classical World: USA')

In [ ]: