In [ ]:
import os, sys, time
import sqlite3
import pickle as pkl
import numpy as np

In [ ]:
data_dir = 'data'
fdb = os.path.join(data_dir, 'msd/track_metadata.db')
faotm = os.path.join(data_dir, 'aotm-2011/aotm-2011-subset.pkl')

Load (a subset of) AotM playlist data.


In [ ]:
playlists = pkl.load(open(faotm, 'rb'))

In [ ]:
playlists[0]

In [ ]:
len(playlists)

Connect the SQLite MSD metadata DB.


In [ ]:
conn = sqlite3.connect(fdb)

Get table names.


In [ ]:
res = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
data = res.fetchall()
conn.commit()

In [ ]:
data

Get column names.


In [ ]:
res = conn.execute('PRAGMA table_info(songs)')
data = res.fetchall()
conn.commit()

In [ ]:
data

Map song_id to track_id.


In [ ]:
song_id_set = {sid for pl in playlists for sid in pl}

In [ ]:
len(song_id_set)

In [ ]:
songID2TrackID = dict()  # song_id --> track_id

In [ ]:
cnt = 0
for sid in sorted(song_id_set):
    res = conn.execute("SELECT track_id FROM songs WHERE song_id='" + sid + "'")
    data = res.fetchall()
    conn.commit()
    tids = [t[0] for t in data]
    songID2TrackID[sid] = tids
    cnt += 1
    if cnt % 10 == 0: 
        print(cnt); sys.stdout.flush()

In [ ]:
len(songID2TrackID)

In [ ]:
sorted(songID2TrackID.items())[:10]

Close SQLite DB connection.


In [ ]:
conn.close()

Save mapping.


In [ ]:
pkl.dump(songID2TrackID, open(os.path.join(data_dir, 'aotm-2011/songID2TrackID.pkl'), 'wb'))