In [ ]:
import os, sys, time
import sqlite3
import pickle as pkl
import numpy as np
In [ ]:
data_dir = 'data'
fdb = os.path.join(data_dir, 'msd/track_metadata.db')
faotm = os.path.join(data_dir, 'aotm-2011/aotm-2011-subset.pkl')
Load (a subset of) AotM playlist data.
In [ ]:
playlists = pkl.load(open(faotm, 'rb'))
In [ ]:
playlists[0]
In [ ]:
len(playlists)
Connect the SQLite MSD metadata DB.
In [ ]:
conn = sqlite3.connect(fdb)
Get table names.
In [ ]:
res = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
data = res.fetchall()
conn.commit()
In [ ]:
data
Get column names.
In [ ]:
res = conn.execute('PRAGMA table_info(songs)')
data = res.fetchall()
conn.commit()
In [ ]:
data
Map song_id
to track_id
.
In [ ]:
song_id_set = {sid for pl in playlists for sid in pl}
In [ ]:
len(song_id_set)
In [ ]:
songID2TrackID = dict() # song_id --> track_id
In [ ]:
cnt = 0
for sid in sorted(song_id_set):
res = conn.execute("SELECT track_id FROM songs WHERE song_id='" + sid + "'")
data = res.fetchall()
conn.commit()
tids = [t[0] for t in data]
songID2TrackID[sid] = tids
cnt += 1
if cnt % 10 == 0:
print(cnt); sys.stdout.flush()
In [ ]:
len(songID2TrackID)
In [ ]:
sorted(songID2TrackID.items())[:10]
Close SQLite DB connection.
In [ ]:
conn.close()
Save mapping.
In [ ]:
pkl.dump(songID2TrackID, open(os.path.join(data_dir, 'aotm-2011/songID2TrackID.pkl'), 'wb'))