In [5]:
import sqlite3
conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
params = (.2, .6)
c.execute('SELECT * FROM foote_bounds WHERE F05=? AND F3=?', params)
print c.fetchall()
conn.close()
In [11]:
import sqlite3
import matplotlib.pyplot as plt
conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
trim = False
c.execute('SELECT * FROM mma_bounds WHERE trim=?', (trim,))
mma_results = np.asarray(c.fetchall(), dtype=[('track_id', '<U400'), ('F05', float),
('P05', float), ('R05', float), ('F3', float),
('P3', float), ('R3', float), ('D', float), ('DevA2E', float), ('DevE2A', float),
('annot_beat', int), ('feature', 'S10'),
('add_params', 'S10'), ('trim', int)])
algo_ids = ["serra", "levy", "foote", "siplca", "olda"]
feat_dict = {
'serra' : 'mix',
'levy' : 'hpcp',
'foote' : 'hpcp',
'siplca': '',
'olda' : ''
}
for i, algo_id in enumerate(algo_ids):
c.execute('SELECT * FROM %s_bounds WHERE feature=? AND trim=?' % algo_id, (feat_dict[algo_id],trim))
tmp_results = np.asarray(c.fetchall(), dtype=[('track_id', '<U400'), ('F05', float),
('P05', float), ('R05', float), ('F3', float),
('P3', float), ('R3', float), ('D', float), ('DevA2E', float), ('DevE2A', float),
('annot_beat', int), ('feature', 'S10'),
('add_params', 'S10'), ('trim', int)])
#print algo_id, len(tmp_results),i, tmp_results[0]
if i == 0:
mgp_results = tmp_results
else:
mgp_results['F05'] = np.mean(np.vstack((mgp_results['F05'], tmp_results['F05'])), axis=0)
mgp_results['F3'] = np.mean(np.vstack((mgp_results['F3'], tmp_results['F3'])), axis=0)
mgp_results['D'] = np.mean(np.vstack((mgp_results['D'], tmp_results['D'])), axis=0)
mgp_results['DevA2E'] = np.mean(np.vstack((mgp_results['DevA2E'], tmp_results['DevA2E'])), axis=0)
mgp_results['DevE2A'] = np.mean(np.vstack((mgp_results['DevE2A'], tmp_results['DevE2A'])), axis=0)
# Close SQL connection
conn.close()
# Sort by track id
mma_results = np.sort(mma_results, order='track_id')
mgp_results = np.sort(mgp_results, order='track_id')
def linear_regression(x, y):
A = array([ x, ones(len(x))])
print A.shape, y.shape
w = np.linalg.lstsq(A.T,y)[0] # obtaining the parameters
line = w[0]*x + w[1] # regression line
# Compute the r2 goodness of fit test
ssres = np.sum( (y - line)**2 ) # The residual sum
sstot = np.sum( (y - np.mean(y))**2 ) # The null hypothesis
r2 = 1 - ssres/float(sstot)
return line, r2
def moving_average(a, n=3) :
ret = np.cumsum(a, dtype=float, axis=1)
ret[:,n:] = ret[:,n:] - ret[:,:-n]
return ret[:,n - 1:] / n
def histogram(x, y):
N = 10
H = np.zeros((10, len(x)))
idx_x = np.argsort(x)
print "ymax", y.max()
y = y[idx_x] / y.max()
# y = y[idx_x] / 112.466581286
for i, y_i in enumerate(y):
#print "element", x_i, np.histogram(x_i, bins=np.arange(0, 1.1, 1/float(N)))[0].shape
H[:,i] = np.histogram(y[i], bins=np.arange(0, 1.1, 1/float(N)))[0]
H = moving_average(H, n=9)
return H
def evaluation(x, y, metric, title='', invert=False):
H = histogram(x, y)
line, r2 = linear_regression(x, y)
# Plotting
figsize = (3, 1.5)
bottom_margin = 0.3
left_margin = 0.2
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
metric_str = metric
if "{R2E}" in metric:
metric_str = "DevA2E"
if "{E2R}" in metric:
metric_str = "DevE2A"
if trim:
metric_str += "-trim"
# Plot linear regression
plt.figure(1, figsize=figsize, dpi=160, facecolor='w', edgecolor='k')
plt.scatter(x, y, s=1)
# plt.plot(x, line, 'b-')
#plt.title(title)
plt.gca().set_xlim(0, max(1,x.max()))
plt.gca().set_ylim(0, max(1,y.max()))
plt.xlabel(r"MMA_{%s}" % metric)
plt.ylabel(r"MGP_{%s}" % metric)
plt.gcf().subplots_adjust(bottom=bottom_margin, left=left_margin)
#plt.savefig('../paper/plots/correl-%s.pdf' % metric_str, bbox_inches='tight')
plt.show()
print "R2 of %s is: %.2f" % (title, r2*100)
# Plot Histogram
plt.figure(2, figsize=figsize, dpi=160, facecolor='w', edgecolor='k')
if "{R2E}" in metric or "{E2R}" in metric:
H = H[::-1, ::-1]
plt.imshow(H, interpolation="nearest", aspect="auto", cmap="binary")
plt.gca().invert_yaxis()
plt.gca().set_yticks(np.arange(0,11,5))
plt.gca().set_yticklabels(np.arange(0,11,5)*10)
#plt.title(title)
plt.xlabel(r"Tracks sorted by MMA_{%s}" % metric)
plt.ylabel(r"Histo bins (\%)")
plt.gcf().subplots_adjust(bottom=bottom_margin, left=left_margin)
plt.savefig('../paper/plots/histo-human-%s.pdf' % metric_str, bbox_inches='tight')
plt.show()
# Plot with linear regression
print "End", len(mma_results["F05"]), len(mgp_results['F05'])
# evaluation(mma_results['F05'], mgp_results['F05'], 'F05', 'F-measure 0.5 sec')
# evaluation(mma_results['F3'], mgp_results['F3'], 'F3', 'F-measure 3 sec')
# evaluation(mma_results['D'], mgp_results['D'], 'D', 'Information Gain')
# evaluation(mma_results['DevA2E'], mgp_results['DevA2E'], '$\sigma$_{R2E}', 'Median Deviation: Annotation to Estimation')
# evaluation(mma_results['DevE2A'], mgp_results['DevE2A'], '$\sigma$_{E2R}', 'Median Deviation: Estimation to Annotation')
In [18]:
# HUMANS
import pickle
import jams2
import os
mma_results = pickle.load(open("../experiment/mma_experiment_humans.pk", "r"))
mgp_results = pickle.load(open("../experiment/mgp_experiment_humans.pk", "r"))
# print mgp_results
mgp_results = np.sort(mgp_results, order="F3")
durations = []
for mgp_res in mgp_results:
jam_file = "/Users/uri/datasets/Segments/annotations/" + os.path.basename(mgp_res["track_id"][:-5]) + ".jams"
jam = jams2.load(jam_file)
durations.append(jam.metadata.duration)
plt.figure(1, figsize=(5, 1.9), dpi=160, facecolor='w', edgecolor='k')
plt.plot(np.arange(len(durations)), durations)
plt.gca().set_xlim([0,50])
plt.gca().set_ylim([0,650])
plt.xlabel(r"Tracks sorted by MGP$_{F3}$")
plt.ylabel(r"Duration (seconds)")
plt.gcf().subplots_adjust(bottom=0.23, left=0.14)
plt.show()
#evaluation(mma_results['F05'], mgp_results['F05'], 'F05', 'F-measure 0.5 sec')
# evaluation(mma_results['F3'], mgp_results['F3'], 'F3', 'F-measure 3 sec')
# evaluation(mma_results['D'], mgp_results['D'], 'D', 'Information Gain')
# evaluation(mma_results['DevA2E'], mgp_results['DevA2E'], '$\sigma$_{R2E}', 'Median Deviation: Annotation to Estimation')
# evaluation(mma_results['DevE2A'], mgp_results['DevE2A'], '$\sigma$_{E2R}', 'Median Deviation: Estimation to Annotation')
In [54]:
# Find the 45 "worst" and 5 "best" tracks.
import os
import jams2
total_secs = 0
def is_longer(est_file, secs=600):
"""Checks whether this track is longer than 'secs' seconds."""
jam_file = "/Users/uri/datasets/Segments/annotations/" + est_file[:-5] + ".jams"
jam = jams2.load(jam_file)
global total_secs
total_secs += jam.metadata.duration
if jam.metadata.duration > secs:
return True
return False
# Plot duration
# mgp_results = np.sort(mgp_results, order="F3")
# durations = []
# for mgp_res in mgp_results:
# jam_file = "/Users/uri/datasets/Segments/annotations/" + mgp_res["track_id"][:-5] + ".jams"
# jam = jams2.load(jam_file)
# durations.append(jam.metadata.duration)
# plt.figure(1, figsize=(5, 1.9), dpi=160, facecolor='w', edgecolor='k')
# plt.plot(np.arange(len(durations)), durations)
# plt.gca().set_xlim([0,2154])
# plt.gca().set_ylim([0,1850])
# plt.xlabel(r"Tracks sorted by MGP$_{F3}$")
# plt.ylabel(r"Duration (seconds)")
# plt.gcf().subplots_adjust(bottom=0.23, left=0.14)
# plt.show()
# Find bad results
# bad = ['SALAMI_718.json', 'SALAMI_714.json', 'SALAMI_1376.json', 'SALAMI_720.json', 'SALAMI_378.json', 'SALAMI_724.json', 'SALAMI_710.json']
# for mgp_res in mgp_results:
# if is_longer(mgp_res['track_id'], secs=600):
# bad.append(mgp_res['track_id'])
# print "Total Seconds in dataset", total_secs, total_secs/float(len(mgp_results)), len(mgp_results)
# metric = 'F3'
# # mgp_results = np.sort(mgp_results, order=metric)
# # for mgp_res in mgp_results[:45]:
# # if is_longer(mgp_res['track_id'], secs=600):
# # print "Longer inside"
# # if mgp_res['track_id'] in bad:
# # print "SALAMI speech"
# res = filter(lambda x: x["track_id"] not in bad, mgp_results)
# filtered_good = np.sort(res, order=metric)[-5:]
# filtered = np.sort(res, order=metric)[:45]
# #print np.sort(filtered, order='track_id')
# subset_secs = 0
# for filt in filtered:
# jam_file = "/Users/uri/datasets/Segments/annotations/" + filt["track_id"][:-5] + ".jams"
# jam = jams2.load(jam_file)
# subset_secs += jam.metadata.duration
# print "Total Seconds in subset", subset_secs, subset_secs/float(len(filtered)), len(filtered)
In [69]:
# Clean up mma table (there are SALAMI tracks without ground truth)
import sqlite3
conn = sqlite3.connect("../results/results.sqlite")
c = conn.cursor()
feat_dict = {
'serra' : 'mix',
'levy' : 'mfcc',
'foote' : 'hpcp',
'siplca': '',
'olda' : ''
}
tracks = []
trim = True
for algo_id in ["serra", "levy"]:
c.execute('SELECT track_id FROM %s_bounds WHERE feature=? AND trim=?' % algo_id, (feat_dict[algo_id],trim))
tracks.append(c.fetchall())
serra = set(tracks[0])
levy = set(tracks[1])
print serra ^ levy
c.execute('SELECT track_id FROM mma_bounds')
mma = set(c.fetchall())
to_delete = mma ^ serra
for track in to_delete:
c.execute('DELETE FROM mma_bounds WHERE track_id=?', track)
print track
conn.commit()
conn.close()
In [82]:
# Binary entropy
score = 0.5
scores = np.asarray([score, 1-score])
entropy = 0
for s in scores:
entropy += s*np.log2(s)
entropy *= -1
print entropy
print filtered["track_id"], len(filtered["track_id"])
In [6]:
# Copy the subdataset to ~/datasets/SubSegments
import shutil
import os
import jams
import json
def copy_files(files):
for f in files:
# Audio
src = "/Users/uri/datasets/Segments/audio/" + f["track_id"][:-5] + ".mp3"
dest = "/Users/uri/datasets/SubSegments/audio/" + os.path.basename(src)
shutil.copy(src, dest)
# Annotations
src = "/Users/uri/datasets/Segments/annotations/" + f["track_id"][:-5] + ".jams"
jam = jams.load(src)
#jam.sections = [] # Remove all section annotations (we only care about the metadata) # UPDATE: Not true!
for annot in jam.sections:
annot.annotation_metadata.annotator.name = "GT" # Call the annotator "GT"
dest = "/Users/uri/datasets/SubSegments/annotations/" + os.path.basename(src)
with open(dest, "w") as f:
json.dump(jam, f, indent=2)
copy_files(filtered)
copy_files(filtered_good)
In [83]:
print np.mean(filtered["F3"]), np.mean(filtered["F05"]), filtered.shape, filtered_good.shape
merged = np.append(filtered, filtered_good, axis=0)
for filt in merged:
print filt["track_id"], filt["F3"], filt["DevA2E"]
import pickle
#pickle.dump(merged, open("mgp_experiment_machine.pk", "w"))
In [ ]: