notebook.community

Edit and run



In [5]:

    
import sqlite3
conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
params = (.2, .6)
c.execute('SELECT * FROM foote_bounds WHERE F05=? AND F3=?', params)
print c.fetchall()
conn.close()

[]



In [11]:

    
import sqlite3
import matplotlib.pyplot as plt

conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
trim = False
c.execute('SELECT * FROM mma_bounds WHERE trim=?', (trim,))
mma_results = np.asarray(c.fetchall(), dtype=[('track_id', '<U400'), ('F05', float), 
                    ('P05', float), ('R05', float), ('F3', float),
                    ('P3', float), ('R3', float), ('D', float), ('DevA2E', float), ('DevE2A', float),
                    ('annot_beat', int), ('feature', 'S10'),
                    ('add_params', 'S10'), ('trim', int)])
algo_ids = ["serra", "levy", "foote", "siplca", "olda"]
feat_dict = {
    'serra' :   'mix',
    'levy'  :   'hpcp',
    'foote' :   'hpcp',
    'siplca':   '',
    'olda'  :   ''
}
for i, algo_id in enumerate(algo_ids):
    c.execute('SELECT * FROM %s_bounds WHERE feature=? AND trim=?' % algo_id, (feat_dict[algo_id],trim))
    tmp_results = np.asarray(c.fetchall(), dtype=[('track_id', '<U400'), ('F05', float), 
                    ('P05', float), ('R05', float), ('F3', float),
                    ('P3', float), ('R3', float), ('D', float), ('DevA2E', float), ('DevE2A', float),
                    ('annot_beat', int), ('feature', 'S10'),
                    ('add_params', 'S10'), ('trim', int)])
    #print algo_id, len(tmp_results),i, tmp_results[0]
    if i == 0:
        mgp_results = tmp_results
    else:
        mgp_results['F05'] = np.mean(np.vstack((mgp_results['F05'], tmp_results['F05'])), axis=0)
        mgp_results['F3'] = np.mean(np.vstack((mgp_results['F3'], tmp_results['F3'])), axis=0)
        mgp_results['D'] = np.mean(np.vstack((mgp_results['D'], tmp_results['D'])), axis=0)
        mgp_results['DevA2E'] = np.mean(np.vstack((mgp_results['DevA2E'], tmp_results['DevA2E'])), axis=0)
        mgp_results['DevE2A'] = np.mean(np.vstack((mgp_results['DevE2A'], tmp_results['DevE2A'])), axis=0)

# Close SQL connection
conn.close()

# Sort by track id
mma_results = np.sort(mma_results, order='track_id')
mgp_results = np.sort(mgp_results, order='track_id')

def linear_regression(x, y):
    A = array([ x, ones(len(x))])
    print A.shape, y.shape
    w = np.linalg.lstsq(A.T,y)[0] # obtaining the parameters
    line = w[0]*x + w[1] # regression line
    
    # Compute the r2 goodness of fit test
    ssres = np.sum( (y - line)**2 )        # The residual sum
    sstot = np.sum( (y - np.mean(y))**2 )  # The null hypothesis
    r2 = 1 - ssres/float(sstot)
    
    return line, r2

def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float, axis=1)
    ret[:,n:] = ret[:,n:] - ret[:,:-n]
    return ret[:,n - 1:] / n

def histogram(x, y):
    N = 10
    H = np.zeros((10, len(x)))
    idx_x = np.argsort(x)
    print "ymax", y.max()
    y = y[idx_x] / y.max()
#     y = y[idx_x] / 112.466581286
    for i, y_i in enumerate(y):
        #print "element", x_i, np.histogram(x_i, bins=np.arange(0, 1.1, 1/float(N)))[0].shape
        H[:,i] = np.histogram(y[i], bins=np.arange(0, 1.1, 1/float(N)))[0]
    H = moving_average(H, n=9)
    return H

def evaluation(x, y, metric, title='', invert=False):
    H = histogram(x, y)
    line, r2 = linear_regression(x, y)
    
    # Plotting
    figsize = (3, 1.5)
    bottom_margin = 0.3
    left_margin = 0.2
    plt.rc('text', usetex=True)
    plt.rc('font', family='serif')
    plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
    metric_str = metric
    if "{R2E}" in metric:
        metric_str = "DevA2E"
    if "{E2R}" in metric:
        metric_str = "DevE2A"
    if trim:
        metric_str += "-trim"
    
    # Plot linear regression
    plt.figure(1, figsize=figsize, dpi=160, facecolor='w', edgecolor='k')
    plt.scatter(x, y, s=1)
#     plt.plot(x, line, 'b-')
    #plt.title(title)
    plt.gca().set_xlim(0, max(1,x.max()))
    plt.gca().set_ylim(0, max(1,y.max()))
    plt.xlabel(r"MMA_{%s}" % metric)
    plt.ylabel(r"MGP_{%s}" % metric)
    plt.gcf().subplots_adjust(bottom=bottom_margin, left=left_margin)
    #plt.savefig('../paper/plots/correl-%s.pdf' % metric_str, bbox_inches='tight')
    plt.show()
    
    print "R2 of %s is: %.2f" % (title, r2*100)
    
    # Plot Histogram
    plt.figure(2, figsize=figsize, dpi=160, facecolor='w', edgecolor='k')
    if "{R2E}" in metric or "{E2R}" in metric:
        H = H[::-1, ::-1]
    plt.imshow(H, interpolation="nearest", aspect="auto", cmap="binary")
    plt.gca().invert_yaxis()
    plt.gca().set_yticks(np.arange(0,11,5))
    plt.gca().set_yticklabels(np.arange(0,11,5)*10)
    #plt.title(title)
    plt.xlabel(r"Tracks sorted by MMA_{%s}" % metric)
    plt.ylabel(r"Histo bins (\%)")
    plt.gcf().subplots_adjust(bottom=bottom_margin, left=left_margin)
    plt.savefig('../paper/plots/histo-human-%s.pdf' % metric_str, bbox_inches='tight')
    plt.show()
    

# Plot with linear regression
print "End", len(mma_results["F05"]), len(mgp_results['F05'])
# evaluation(mma_results['F05'], mgp_results['F05'], 'F05', 'F-measure 0.5 sec')
# evaluation(mma_results['F3'], mgp_results['F3'], 'F3', 'F-measure 3 sec')
# evaluation(mma_results['D'], mgp_results['D'], 'D', 'Information Gain')
# evaluation(mma_results['DevA2E'], mgp_results['DevA2E'], '$\sigma$_{R2E}', 'Median Deviation: Annotation to Estimation')
# evaluation(mma_results['DevE2A'], mgp_results['DevE2A'], '$\sigma$_{E2R}', 'Median Deviation: Estimation to Annotation')









    



End 2156 2156



In [18]:

    
# HUMANS
import pickle
import jams2
import os

mma_results = pickle.load(open("../experiment/mma_experiment_humans.pk", "r"))
mgp_results = pickle.load(open("../experiment/mgp_experiment_humans.pk", "r"))
# print mgp_results

mgp_results = np.sort(mgp_results, order="F3")
durations = []
for mgp_res in mgp_results:
    jam_file = "/Users/uri/datasets/Segments/annotations/" + os.path.basename(mgp_res["track_id"][:-5]) + ".jams"
    jam = jams2.load(jam_file)
    durations.append(jam.metadata.duration)
plt.figure(1, figsize=(5, 1.9), dpi=160, facecolor='w', edgecolor='k')
plt.plot(np.arange(len(durations)), durations)
plt.gca().set_xlim([0,50])
plt.gca().set_ylim([0,650])
plt.xlabel(r"Tracks sorted by MGP$_{F3}$")
plt.ylabel(r"Duration (seconds)")
plt.gcf().subplots_adjust(bottom=0.23, left=0.14)
plt.show()

#evaluation(mma_results['F05'], mgp_results['F05'], 'F05', 'F-measure 0.5 sec')
# evaluation(mma_results['F3'], mgp_results['F3'], 'F3', 'F-measure 3 sec')
# evaluation(mma_results['D'], mgp_results['D'], 'D', 'Information Gain')
# evaluation(mma_results['DevA2E'], mgp_results['DevA2E'], '$\sigma$_{R2E}', 'Median Deviation: Annotation to Estimation')
# evaluation(mma_results['DevE2A'], mgp_results['DevE2A'], '$\sigma$_{E2R}', 'Median Deviation: Estimation to Annotation')



In [54]:

    
# Find the 45 "worst" and 5 "best" tracks.
import os
import jams2

total_secs = 0
def is_longer(est_file, secs=600):
    """Checks whether this track is longer than 'secs' seconds."""
    jam_file = "/Users/uri/datasets/Segments/annotations/" + est_file[:-5] + ".jams"
    jam = jams2.load(jam_file)
    global total_secs
    total_secs += jam.metadata.duration
    if jam.metadata.duration > secs:
        return True
    return False

# Plot duration
# mgp_results = np.sort(mgp_results, order="F3")
# durations = []
# for mgp_res in mgp_results:
#     jam_file = "/Users/uri/datasets/Segments/annotations/" + mgp_res["track_id"][:-5] + ".jams"
#     jam = jams2.load(jam_file)
#     durations.append(jam.metadata.duration)
# plt.figure(1, figsize=(5, 1.9), dpi=160, facecolor='w', edgecolor='k')
# plt.plot(np.arange(len(durations)), durations)
# plt.gca().set_xlim([0,2154])
# plt.gca().set_ylim([0,1850])
# plt.xlabel(r"Tracks sorted by MGP$_{F3}$")
# plt.ylabel(r"Duration (seconds)")
# plt.gcf().subplots_adjust(bottom=0.23, left=0.14)
# plt.show()

# Find bad results
# bad = ['SALAMI_718.json', 'SALAMI_714.json', 'SALAMI_1376.json', 'SALAMI_720.json', 'SALAMI_378.json', 'SALAMI_724.json', 'SALAMI_710.json']
# for mgp_res in mgp_results:
#     if is_longer(mgp_res['track_id'], secs=600):
#         bad.append(mgp_res['track_id'])
        
# print "Total Seconds in dataset", total_secs, total_secs/float(len(mgp_results)), len(mgp_results)

# metric = 'F3'
# # mgp_results = np.sort(mgp_results, order=metric)
# # for mgp_res in mgp_results[:45]:
# #     if is_longer(mgp_res['track_id'], secs=600):
# #         print "Longer inside"
# #     if mgp_res['track_id'] in bad:
# #         print "SALAMI speech"
# res = filter(lambda x: x["track_id"] not in bad, mgp_results)
# filtered_good = np.sort(res, order=metric)[-5:]
# filtered = np.sort(res, order=metric)[:45]
# #print np.sort(filtered, order='track_id')

# subset_secs = 0
# for filt in filtered:
#     jam_file = "/Users/uri/datasets/Segments/annotations/" + filt["track_id"][:-5] + ".jams"
#     jam = jams2.load(jam_file)
#     subset_secs += jam.metadata.duration
    
# print "Total Seconds in subset", subset_secs, subset_secs/float(len(filtered)), len(filtered)









    



---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-54-aa44c76e45ea> in <module>()
     19 for mgp_res in mgp_results:
     20     jam_file = "/Users/uri/datasets/Segments/annotations/" + mgp_res["track_id"][:-5] + ".jams"
---> 21     jam = jams2.load(jam_file)
     22     durations.append(jam.metadata.duration)
     23 plt.figure(1, figsize=(5, 1.9), dpi=160, facecolor='w', edgecolor='k')

/Users/uri/NYU/Dissertation/jams2/fileio.pyc in load(filepath)
     10 def load(filepath):
     11     """Load a JSON formatted stream from a file."""
---> 12     fpointer = open(filepath, 'r')
     13     return Jams(**json.load(fpointer))
     14 

IOError: [Errno 2] No such file or directory: u'/Users/uri/datasets/Segments/annotations//Users/uri/datasets/SubSegments/annotations/Cerulean_Yes-Starship_Trooper:_A._Life_Seeker,_B._Disillu.jams'



In [69]:

    
# Clean up mma table (there are SALAMI tracks without ground truth)
import sqlite3
conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
feat_dict = {
    'serra' :   'mix',
    'levy'  :   'mfcc',
    'foote' :   'hpcp',
    'siplca':   '',
    'olda'  :   ''
}
tracks = []
trim = True
for algo_id in ["serra", "levy"]:
    c.execute('SELECT track_id FROM %s_bounds WHERE feature=? AND trim=?' % algo_id, (feat_dict[algo_id],trim))
    tracks.append(c.fetchall())
serra = set(tracks[0])
levy = set(tracks[1])
print serra ^ levy

c.execute('SELECT track_id FROM mma_bounds')
mma = set(c.fetchall())
to_delete = mma ^ serra
for track in to_delete:
    c.execute('DELETE FROM mma_bounds WHERE track_id=?', track)
    print track
conn.commit()
conn.close()









    



set([])
(u'SALAMI_1320.json',)
(u'SALAMI_1052.json',)
(u'SALAMI_1466.json',)
(u'SALAMI_918.json',)
(u'SALAMI_1440.json',)
(u'SALAMI_872.json',)
(u'SALAMI_1430.json',)
(u'SALAMI_1126.json',)
(u'SALAMI_1500.json',)
(u'SALAMI_1140.json',)
(u'SALAMI_1040.json',)
(u'SALAMI_966.json',)
(u'SALAMI_1486.json',)
(u'SALAMI_1398.json',)
(u'SALAMI_1030.json',)
(u'SALAMI_1410.json',)
(u'SALAMI_1426.json',)
(u'SALAMI_1178.json',)



In [82]:

    
# Binary entropy
score = 0.5
scores = np.asarray([score, 1-score])
entropy = 0
for s in scores:
    entropy += s*np.log2(s)
entropy *= -1
print entropy
print filtered["track_id"], len(filtered["track_id"])









    



1.0
[u'SALAMI_546.json' u'Isophonics_02 Under Pressure.json'
 u'Isophonics_01 The Show Must Go On.json' u'SALAMI_68.json'
 u'SALAMI_830.json'
 u'Cerulean_Yes-Starship_Trooper:_A._Life_Seeker,_B._Disillu.json'
 u'SALAMI_1472.json' u'SALAMI_1482.json'
 u'Cerulean_Bob_Dylan-Like_a_Rolling_Stone_(Live).json'
 u'Isophonics_09_-_You_Never_Give_Me_Your_Money.json' u'SALAMI_1072.json'
 u"Isophonics_CD1_-_14_-_Don't_Pass_Me_By.json" u'SALAMI_444.json'
 u'Isophonics_05 Somebody To Love.json'
 u'Isophonics_CD2_-_06_-_Helter_Skelter.json' u'SALAMI_458.json'
 u'SALAMI_868.json' u'Isophonics_14 Smackwater Jack [Live].json'
 u'SALAMI_1468.json'
 u'Cerulean_Leonard_Bernstein,_New_York_Philharmonic_&_Rudol.json'
 u'SALAMI_812.json'
 u'Cerulean_Boston_Symphony_Orchestra_&_Charles_Munch-Sympho.json'
 u'SALAMI_286.json' u'SALAMI_944.json' u'SALAMI_1324.json'
 u'SALAMI_758.json' u'SALAMI_1152.json' u'SALAMI_1270.json'
 u'SALAMI_308.json' u'SALAMI_12.json' u'SALAMI_542.json' u'SALAMI_940.json'
 u'SALAMI_876.json' u'SALAMI_1130.json' u'SALAMI_728.json'
 u'SALAMI_164.json' u'Cerulean_Miles_Davis_Quintet-Footprints.json'
 u'SALAMI_584.json' u'SALAMI_1000.json'
 u'Cerulean_Bob_Dylan-Hurricane.json' u'SALAMI_1240.json'
 u'SALAMI_882.json' u'SALAMI_1448.json'
 u'Cerulean_Prince_&_The_Revolution-Purple_Rain.json' u'SALAMI_78.json'] 45



In [6]:

    
# Copy the subdataset to ~/datasets/SubSegments

import shutil
import os
import jams
import json

def copy_files(files):
    for f in files:
        # Audio
        src = "/Users/uri/datasets/Segments/audio/" + f["track_id"][:-5] + ".mp3"
        dest = "/Users/uri/datasets/SubSegments/audio/" + os.path.basename(src)
        shutil.copy(src, dest)
        
        # Annotations
        src = "/Users/uri/datasets/Segments/annotations/" + f["track_id"][:-5] + ".jams"
        jam = jams.load(src)
        #jam.sections = [] # Remove all section annotations (we only care about the metadata) # UPDATE: Not true!
        for annot in jam.sections:
            annot.annotation_metadata.annotator.name = "GT"  # Call the annotator "GT"
        dest = "/Users/uri/datasets/SubSegments/annotations/" + os.path.basename(src)
        with open(dest, "w") as f:
            json.dump(jam, f, indent=2)

copy_files(filtered)
copy_files(filtered_good)



In [83]:

    
print np.mean(filtered["F3"]), np.mean(filtered["F05"]), filtered.shape, filtered_good.shape
merged = np.append(filtered, filtered_good, axis=0)
for filt in merged:
    print filt["track_id"], filt["F3"], filt["DevA2E"]
import pickle
#pickle.dump(merged, open("mgp_experiment_machine.pk", "w"))









    



0.270176197771 0.189591116707 (45,) (5,)
SALAMI_546.json 0.200844626388 1.96698953528
Isophonics_02 Under Pressure.json 0.21368006993 11.2496330443
Isophonics_01 The Show Must Go On.json 0.230300605192 8.28406908684
SALAMI_68.json 0.230766377697 0.448315429895
SALAMI_830.json 0.241028295376 0.39388045725
Cerulean_Yes-Starship_Trooper:_A._Life_Seeker,_B._Disillu.json 0.24621284072 14.8982950081
SALAMI_1472.json 0.247552104678 15.8240720892
SALAMI_1482.json 0.25118498417 8.53194592761
Cerulean_Bob_Dylan-Like_a_Rolling_Stone_(Live).json 0.252962011648 36.1585672042
Isophonics_09_-_You_Never_Give_Me_Your_Money.json 0.254643912963 5.89547148519
SALAMI_1072.json 0.25535408072 27.5903004205
Isophonics_CD1_-_14_-_Don't_Pass_Me_By.json 0.255428970007 8.13095029838
SALAMI_444.json 0.25580894146 10.6345364634
Isophonics_05 Somebody To Love.json 0.256420275282 8.00841285774
Isophonics_CD2_-_06_-_Helter_Skelter.json 0.257052957935 12.5014932569
SALAMI_458.json 0.258506938234 4.58800824429
SALAMI_868.json 0.264371679436 14.3667829664
Isophonics_14 Smackwater Jack [Live].json 0.265064102564 13.6847123755
SALAMI_1468.json 0.265317409411 4.49913092387
Cerulean_Leonard_Bernstein,_New_York_Philharmonic_&_Rudol.json 0.265900436144 4.34910873716
SALAMI_812.json 0.267112376352 14.5518398676
Cerulean_Boston_Symphony_Orchestra_&_Charles_Munch-Sympho.json 0.267165551839 6.28206599829
SALAMI_286.json 0.268283371041 9.26491296585
SALAMI_944.json 0.271415327893 14.8367286815
SALAMI_1324.json 0.272234320557 15.1427010003
SALAMI_758.json 0.283242853638 3.05487542921
SALAMI_1152.json 0.285931876234 19.1925680364
SALAMI_1270.json 0.286436100132 5.71416974484
SALAMI_308.json 0.286874303278 8.39882629327
SALAMI_12.json 0.287987012987 1.08064729192
SALAMI_542.json 0.28845254792 1.45189515144
SALAMI_940.json 0.288923731165 3.45262414015
SALAMI_876.json 0.288934652808 5.89522148381
SALAMI_1130.json 0.288980767928 10.7628228415
SALAMI_728.json 0.289212359121 10.2351546608
SALAMI_164.json 0.290174220273 2.14556122975
Cerulean_Miles_Davis_Quintet-Footprints.json 0.295395576646 8.413942909
SALAMI_584.json 0.295445526696 1.43745644573
SALAMI_1000.json 0.295454545455 7.20012975514
Cerulean_Bob_Dylan-Hurricane.json 0.297326251717 31.0546130568
SALAMI_1240.json 0.29768143475 6.86862279213
SALAMI_882.json 0.298386486702 2.5075090965
SALAMI_1448.json 0.298710576936 9.37154689527
Cerulean_Prince_&_The_Revolution-Purple_Rain.json 0.299771421108 16.2228284537
SALAMI_78.json 0.299994086584 0.55736946296
Epiphyte_0537_writteninthestars.json 0.774464570517 1.47998361549
Isophonics_12_-_Polythene_Pam.json 0.779541292041 1.98210540028
Epiphyte_0298_turnmeon.json 0.783164983165 0.949352259036
Epiphyte_0220_promiscuous.json 0.790323004202 2.07247616025
Epiphyte_0195_nookie.json 0.807383309632 1.00601805238



In [ ]: