In [4]:
# Add last boundary to human annotations
from msaf import jams2
import glob
import json
annotators = {}
annotators["Colin"] = {
"name" : "Colin",
"email" : "colin.z.hua@gmail.com"
}
annotators["Eleni"] = {
"name" : "Eleni",
"email" : "evm241@nyu.edu"
}
annotators["Evan"] = {
"name" : "Evan",
"email" : "esj254@nyu.edu"
}
annotators["John"] = {
"name" : "John",
"email" : "johnturner@me.com"
}
annotators["Shuli"] = {
"name" : "Shuli",
"email" : "luiseslt@gmail.com"
}
def update_last_boundary(jam_file, annotator_name, time, context):
jam = jams2.load(jam_file)
for annotation in jam["sections"]:
if annotation.annotation_metadata.annotator.name == annotator_name:
for i, data in enumerate(annotation.data):
if annotation.data[i+1].label.context == "small_scale":
data.end.value = time
break
break
json.dump(jam, open(jam_file, "w"), indent=2)
def add_last_boundary(jam_file, annotator_name, time, context):
jam = jams2.load(jam_file)
for annotation in jam["sections"]:
if annotation.annotation_metadata.annotator.name == annotator_name:
for i, data in enumerate(annotation.data):
if annotation.data[i+1].label.context == "small_scale":
segment = annotation.create_datapoint()
segment.start.value = data.end.value
segment.end.value = time
segment.label.value = "END"
segment.label.context = context
break
break
json.dump(jam, open(jam_file, "w"), indent=2)
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
context = "large_scale"
for jam_file in jam_files:
jam = jams2.load(jam_file)
dur = jam.metadata.duration
for key in annotators.keys():
inters, labels = jams2.converters.load_jams_range(jam_file, "sections",
annotator_name=annotators[key]["name"],
context=context)
if dur - inters[-1,-1] < -.1:
print "Warning: the last boundary by %s is placed after the track duration! (%s)" % \
(key, jam_file)
print dur, inters[-1, -1]
update_last_boundary(jam_file, key, dur, context)
if np.abs(dur - inters[-1, -1]) > 0.5:
print "Warning: the last boundary by %s is not placed at the end of the track! (%s)" % \
(key, jam_file)
print dur, inters[-1, -1]
add_last_boundary(jam_file, key, dur, context)
In [3]:
# Move estimations to SubSegments folder
import glob
import os
import shutil
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
for jam_file in jam_files:
orig = "/Users/uri/datasets/Segments/estimations/" + os.path.basename(jam_file)[:-5] + ".json"
dest = "/Users/uri/datasets/SubSegments/estimations/" + os.path.basename(orig)
shutil.copy(orig, dest)
In [316]:
# Experiment 1: Dur x #bounds x score
import sys
import jams2
import os
import glob
sys.path.append("..")
import eval as EV
import msaf_io as MSAF
# max_dur: 1728.257007 (SALAMI_878)
# max #boundaries: 75 (SALAMI_888)
N = 76
M = 100
# max_M = 1730.
max_M = 2000.
MAX_N = 138
MAX_M = 149
algos = ["olda", "siplca", "serra", "levy", "foote"]
trim = False
params_dict = {"olda" : "", "siplca" : "", "serra" : "mix",
"levy" : "mfcc" , "foote" : "mfcc"}
bins = 250
in_path = "/Users/uri/datasets/Segments/"
est_files = glob.glob(os.path.join(in_path, "estimations", "*.json"))
jam_files = glob.glob(os.path.join(in_path, "annotations", "*.jams"))
heat_map = np.zeros((9, N, M))
counts = np.zeros((N, M))
SD = np.zeros((len(est_files), len(algos), 3)) # Source Data for Eric
for i, est_file in enumerate(est_files):
ds_prefix = os.path.basename(est_file).split("_")[0]
# Get corresponding annotation file
jam_file = EV.get_annotation(est_file, jam_files)
# Get number of bounds
try:
ref_inter, ref_labels = jams2.converters.load_jams_range(jam_file,
"sections", annotator=0, context=MSAF.prefix_dict[ds_prefix])
except:
print "No annotation for %s, skipping." % jam_file
continue
n = np.min([len(ref_inter) + 1, MAX_N])
# Get duration
jam = jams2.load(jam_file)
dur = jam.metadata.duration
# Place duration into correct bin
m = np.min([int(dur / max_M * M), MAX_M])
# Compute score
score = []
for j, algo_id in enumerate(algos):
params = {"feature" : params_dict[algo_id]}
est_inter = MSAF.read_estimations(est_file, algo_id, False, **params)
res = EV.compute_results(ref_inter, est_inter, trim, bins, est_file)
score.append(res)
SD[i, j, :] = np.array([len(ref_inter) + 1, dur, res[2]])
score = np.mean(np.asarray(score), axis=0)
# Add to heat map
heat_map[:, n, m] += score
counts[n, m] += 1
print SD.shape
np.save(open("exp1.npz", "w"), SD)
In [170]:
# Experiment 1 (cont)
metric_dict = {"F3" : 2, "F05" : 5, "D" : 6, "$\sigma_{R2E}$" : 7,
"$\sigma_{E2R}$" : 8}
metric = "$\sigma_{E2R}$"
# Reduce data
mN = 40
mM = 40
heat_map[np.isnan(heat_map)] = 0
heat_map_metric = np.zeros((mN, mM))
for i in xrange(heat_map[metric_dict[metric], :, :].shape[0]):
for j in xrange(heat_map[metric_dict[metric], :, :].shape[1]):
heat_map_metric[np.min([i, mN-1]), np.min([j, mM-1])] += heat_map[metric_dict[metric], i, j]
idx = np.where(heat_map_metric == 0)
heat_map_metric[idx] = np.nan
# Plotting
figsize = (4, 3)
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
plt.imshow(heat_map_metric, interpolation="nearest", aspect="auto", cmap="hot")
plt.xlabel("Duration (seconds)")
plt.ylabel("Number of boundaries")
plt.title("#boundaries vs duration using %s" % metric)
plt.gcf().subplots_adjust(bottom=0.18)
plt.xticks(np.arange(0, mM, 10))
plt.gca().set_xticklabels(np.arange(0, mM+1, 10, dtype=int) / float(M) * max_M)
plt.show()
In [2]:
# Experiment 2a: Obtain evaluation for all the different subjects
import sys
sys.path.append("..")
import eval as EV
reload(EV)
def print_results(results, std=False):
"""Print all the results.
Parameters
----------
results: np.array(9)
Results in the following format:
0 : Precision 3 seconds
1 : Recall 3 seconds
2 : F-measure 3 seconds
3 : Precision 0.5 seconds
4 : Recall 0.5 seconds
5 : F-measure 0.5 seconds
6 : Information Gain
7 : Median Deviation from Annotated to Estimated boundary
8 : Median Deviation from Estimated to Annotated boundary
"""
results = np.asarray(results)
res = results.mean(axis=0)
print "F3: %.2f, P3: %.2f, R3: %.2f, F05: %.2f, P05: %.2f, " \
"R05: %.2f, D: %.4f, Ann2EstDev: %.2f, Est2AnnDev: %.2f" % \
(100 * res[2], 100 * res[0], 100 * res[1], 100 * res[5],
100 * res[3], 100 * res[4], res[6], res[7], res[8])
in_path = "/Users/uri/datasets/SubSegments/"
algos = ["olda", "siplca", "serra", "levy", "foote"]
trim = False
params_dict = {"olda" : "", "siplca" : "", "serra" : "mix",
"levy" : "mfcc" , "foote" : "mfcc"}
annotators = ["GT", "Colin", "Eleni", "Evan", "John", "Shuli"]
SD = np.zeros((50, 6, 5, 5)) # Tracks, annotators, Num algos, num metrics
F3_F05 = np.zeros((50, 6, 5, 2)) # Tracks, GT+annotators, algorithms, F3 + F05
for i, annotator in enumerate(annotators):
res = []
std = []
res_tot = []
for j, algo_id in enumerate(algos):
params = {"feature" : params_dict[algo_id]}
results = EV.process(in_path, algo_id, trim=trim, annotator=annotator,
**params)
SD[:, i, j, :] = results[:, [2,5, 6, 7, 8]]
res_tot.append(results[:, [2,5]])
res.append(np.mean(results, axis=0))
std.append(np.std(results, axis=0))
F3_F05[:, i, j, :] = results[:, [2, 5]]
res_tot = np.asarray(res_tot)
print annotator
print_results(res)
print "STD", annotator
print_results(std)
F3_F05 = np.asarray(F3_F05)
print F3_F05.shape
# Save Source Data for Eric-ah
print SD.shape
np.save(open("exp2a.npz", "w"), SD)
In [1]:
# Experiment 2b: Compare annotations
import glob
import sys
sys.path.append("..")
sys.path.append("../experiment/")
import eval as EV
import analyze_results as AR
import json
from collections import OrderedDict
reload(EV)
reload(AR)
metric_dict = OrderedDict()
metric_dict["F3"] = 2
metric_dict["F05"] = 5
metric_dict["D"] = 6
metric_dict["$\sigma_{R2E}$"] = 7
metric_dict["$\sigma_{E2R}$"] = 8
metric = "F3"
trim = True
N = 6 # N annotators
X = np.empty((0, N, N))
SD = np.zeros([50, 6, 6, 5]) # (num_tracks, num_annotators, num_annotators, metric)
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
for i, jam_file in enumerate(jam_files):
mma_res = AR.compute_mma_results(jam_file, AR.annotators, trim, gt=True)
x = np.zeros((N, N))
idx = np.triu_indices(N, k=1)
for j, met in enumerate(metric_dict):
x[idx] = mma_res[:, metric_dict[met]]
SD[i, :, :, j] = x
x[idx] = mma_res[:, metric_dict[metric]]
X = np.append(X, [x], axis=0)
# Save data for Ah-Rica
print SD.shape
np.save(open("exp2b.npz", "w"), SD)
X_mean = np.mean(X, axis=0)
# X_mean = X_mean + X_mean.T
# np.fill_diagonal(X_mean, 1)
idx_where = np.where(X_mean == 0)
X_mean[idx_where] = np.mean(X_mean[idx])
print X_mean
vmax = np.sort(np.unique(X_mean))[-2]
figsize = (4, 3)
annots = ["GT", "Ann1", "Ann2", "Ann3", "Ann4", "Ann5"]
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
# plt.imshow(X_mean, interpolation="nearest", aspect="auto", cmap="hot", vmin=X_mean.min(), vmax=vmax)
plt.imshow(X_mean, interpolation="nearest", aspect="auto", cmap="hot")
print json.dumps(X_mean.tolist())
plt.gca().set_xticks(np.arange(0,6))
plt.gca().set_yticks(np.arange(0,6))
plt.gca().set_xticklabels(annots)
plt.gca().set_yticklabels(annots)
# plt.title("Annotators Agreement for %s" % metric)
plt.title("Annotators Mutual Agreement")
plt.colorbar()
plt.show()
In [616]:
# Experiment 3: compare all the annotated boundaries to come up with a more robust reference
import glob
import sys
import os
sys.path.append("..")
sys.path.append("../experiment/")
import eval as EV
import analyze_results as AR
import msaf_io as MSAF
import json
reload(EV)
reload(AR)
def create_weighted_annot(jam, weights, histo_times, jam_file):
# Annot
annot_name = "Weighted"
annot = None
for annotation in jam.sections:
if annotation.annotation_metadata.annotator.name == annot_name:
annotation.data = []
annot = annotation
break
if annot is None:
annot = jam.sections.create_annotation()
annot.annotation_metadata.origin = "Synth"
annot.annotation_metadata.annotator.name = annot_name
annot.annotation_metadata.annotator.email = ""
bound_times = []
bound_weights = []
for i, weight in enumerate(weights):
if weight != 0:
bound_times.append(histo_times[i])
bound_weights.append(weights[i])
for i, bound in enumerate(bound_times):
if i == 0:
continue
section = annot.create_datapoint()
section.start.value = bound_times[i-1]
section.start.confidence = bound_weights[i-1]
section.end.value = bound_times[i]
section.end.confidence = bound_weights[i]
section.label.context = "synth"
json.dump(jam, open(jam_file, "w"), indent=2)
def create_thres_annot(jam, thresh_bounds, thresh_conf, jam_file):
# Annot
annot_name = "Threshold"
annot = None
for annotation in jam.sections:
if annotation.annotation_metadata.annotator.name == annot_name:
annotation.data = []
annot = annotation
break
if annot is None:
annot = jam.sections.create_annotation()
annot.annotation_metadata.origin = "Synth"
annot.annotation_metadata.annotator.name = annot_name
annot.annotation_metadata.annotator.email = ""
for i, bound in enumerate(thresh_bounds):
if i == 0:
continue
section = annot.create_datapoint()
section.start.value = thresh_bounds[i-1]
section.start.confidence = thresh_conf[i-1]
section.end.value = thresh_bounds[i]
section.end.confidence = thresh_conf[i]
section.label.context = "synth"
json.dump(jam, open(jam_file, "w"), indent=2)
annotators = ["GT", "Colin", "Eleni", "Evan", "John", "Shuli"]
histo_bins = 300
N = len(annotators)
X = np.empty((0, N, N))
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
for jam_file in jam_files[15:]:
# Get Duration
jam = jams2.load(jam_file)
dur = jam.metadata.duration
# Compute the weighted boundaries
boundaries = []
ds_prefix = os.path.basename(jam_file).split("_")[0]
for annot in annotators:
if annot == "GT":
ann_inter, ann_labels = jams2.converters.load_jams_range(jam_file,
"sections", annotator=0, context=MSAF.prefix_dict[ds_prefix])
else:
ann_inter, ann_labels = jams2.converters.load_jams_range(jam_file,
"sections", annotator_name=annot, context="large_scale")
ann_times = EV.intervals_to_times(ann_inter)
histo_bounds, histo_times = np.histogram(ann_times, bins=histo_bins, range=(0, dur))
boundaries.append(histo_bounds)
weights = np.mean(boundaries, axis=0)
weights /= weights.max()
print jam_file
f, axarr = plt.subplots(2, sharex=False, figsize=(7,3.5))
# axarr[0].plot(x, y)
# axarr[0].set_title('Sharing X axis')
# axarr[1].scatter(x, y)
AR.plot_ann_boundaries(jam_file, AR.annotators, ax=axarr[0])
plt.suptitle("Merging Human Boundaries")
axarr[1].plot(weights)
plt.xlim(0, 308)
axarr[1].set_xticks(np.arange(0, 308, 308/5))
axarr[1].set_xticklabels(np.arange(0, 251, 50))
axarr[1].set_ylabel("Weight")
axarr[1].set_xlabel("Duration (seconds)")
plt.subplots_adjust(bottom=0.2)
sys.exit()
# Save weighted boundaries
create_weighted_annot(jam, weights, histo_times, jam_file)
# Compoute the thresholded boundaries
L = 15
hann = np.hanning(L)
weighted_filt = np.convolve(weights, hann, mode="same")
weighted_filt /= weighted_filt.max()
thresh_bounds = []
thresh_conf = []
for i, weight in enumerate(weighted_filt):
peak_found = False
if i == 0 and weighted_filt[i+1] < weight:
peak_found = True
elif i == len(weighted_filt) - 1 and weighted_filt[i-1] < weight:
peak_found = True
elif i != 0 and i != len(weighted_filt) - 1 and \
weighted_filt[i-1] < weight and weight > weighted_filt[i+1]:
peak_found = True
if peak_found:
thresh_bounds.append(histo_times[i])
thresh_conf.append(weight)
# Save
create_thres_annot(jam, thresh_bounds, thresh_conf, jam_file)
# plt.plot(weighted_filt)
# plt.show()
# sys.exit()
In [363]:
# Experiment 3a (cont)
# Now we already have a better reference, let's compute the numbers on the Weighted result
import jams2
import os
import sys
import mir_eval
sys.path.append("..")
import eval as EV
reload(jams2)
reload(jams2.converters)
def weighted_hit_rate(ref_inters, est_inters, weights_inters, window=3):
ref = EV.intervals_to_times(ref_inters)
est = EV.intervals_to_times(est_inters)
weights = EV.intervals_to_times(np.asarray(weights_inters))
# Find matches
matching = mir_eval.util.match_events(ref,est,window)
# Apply weights
hits = np.zeros((len(matching)))
for i in xrange(len(matching)):
hits[i] = weights[matching[i][1]]
# Compute the precision denominator (if hit not found, take mean of weights)
denom_prec = np.ones(len(est)) * np.mean(weights)
for i in xrange(len(matching)):
denom_prec[matching[i][0]] = weights[matching[i][1]]
# Compute scores
precision = np.sum(hits) / np.sum(denom_prec)
recall = np.sum(hits) / np.sum(weights)
f = mir_eval.util.f_measure(precision, recall)
return precision, recall, f
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
est_files = glob.glob("/Users/uri/datasets/SubSegments/estimations/*.json")
algos = ["olda", "siplca", "serra", "levy", "foote"]
trim = False
params_dict = {"olda" : "", "siplca" : "", "serra" : "mix",
"levy" : "mfcc" , "foote" : "mfcc"}
F3_tot = []
F05_tot = []
SD = np.zeros((50, 7, 5, 2)) # tracks, annotators + merged, algorithms, F3 + F05
for i, jam_file in enumerate(jam_files):
est_file = est_files[i]
assert os.path.basename(jam_file)[:-5] == os.path.basename(est_file[:-5])
ref_inters, ref_labels, ref_conf = jams2.converters.load_jams_range(jam_file,
"sections", annotator_name="Weighted", context="synth", confidence=True)
F3 = []
F05 = []
for j, algo_id in enumerate(algos):
params = {"feature" : params_dict[algo_id]}
est_inters = MSAF.read_estimations(est_file, algo_id, False, **params)
p3, r3, f3 = weighted_hit_rate(ref_inters, est_inters, ref_conf, window=3)
F3.append(f3)
p05, r05, f05 = weighted_hit_rate(ref_inters, est_inters, ref_conf, window=0.5)
F05.append(f05)
SD[i, 6, j, :] = np.asarray([f3, f05])
F3_tot.append(np.mean(F3))
F05_tot.append(np.mean(F05))
# Assign teh values for the other annotators
SD[:, :-1, :, :] = F3_F05[:, :, :, :]
# Save data for Eric guapo
print SD.shape
np.save(open("exp3a.npz", "w"), SD)
print "F3 (weighted):", np.mean(F3_tot), np.std(F3_tot)
print "F05 (weighted):", np.mean(F05_tot), np.std(F05_tot)
In [360]:
# Experiment 3b
# Thresholded version of merged boundaries
import jams2
import os
import sys
import mir_eval
sys.path.append("..")
import eval as EV
reload(jams2)
reload(jams2.converters)
jam_files = glob.glob("/Users/uri/datasets/SubSegments/annotations/*.jams")
est_files = glob.glob("/Users/uri/datasets/SubSegments/estimations/*.json")
algos = ["olda", "siplca", "serra", "levy", "foote"]
trim = False
window = 3
params_dict = {"olda" : "", "siplca" : "", "serra" : "mix",
"levy" : "mfcc" , "foote" : "mfcc"}
def filter_ref(ref_inters, ref_conf, th):
assert len(ref_inters) == len(ref_conf)
ref_times = EV.intervals_to_times(ref_inters)
conf_times = EV.intervals_to_times(ref_conf)
idx = np.argwhere(conf_times >= th)
ref_times = ref_times[idx].flatten()
return EV.times_to_intervals(ref_times)
F3_th = []
F05_th = []
SD = np.zeros([50, 5, 20, 2]) # num_tracks, num_algorithms, num_thresh, F-measures
for i, th in enumerate(np.arange(0, 1, .05)):
F3_tot = []
F05_tot = []
print "computing ", th
j = 0
for jam_file, est_file in zip(jam_files, est_files):
assert os.path.basename(jam_file)[:-5] == os.path.basename(est_file[:-5])
ref_inters, ref_labels, ref_conf = jams2.converters.load_jams_range(jam_file,
"sections", annotator_name="Weighted", context="synth", confidence=True)
ref_inters = filter_ref(ref_inters, np.asarray(ref_conf), th)
F3 = []
F05 = []
if len(ref_inters) == 0:
F3 = [0]
F05 = [0]
SD[j, :, i, :] = 0
else:
for k, algo_id in enumerate(algos):
params = {"feature" : params_dict[algo_id]}
est_inters = MSAF.read_estimations(est_file, algo_id, False, **params)
p, r, f = mir_eval.boundary.detection(ref_inters, est_inters, window=3)
F3.append(f)
p, r, f = mir_eval.boundary.detection(ref_inters, est_inters, window=0.5)
F05.append(f)
SD[j, k, i, :] = np.asarray([F3[-1], F05[-1]])
F3_tot.append(np.mean(F3))
F05_tot.append(np.mean(F05))
j += 1
F3_th.append(np.mean(F3_tot))
F05_th.append(np.mean(F05_tot))
print SD.shape
np.save(open("exp3b.npz", "w"), SD)
# Plot
figsize = (4, 3)
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
plt.plot(np.arange(0, 1, .05), F3_th, "b--", label='$F_3$')
plt.plot(np.arange(0, 1, .05), F05_th, "r-", label='$F_{0.5}$')
plt.title("Thresholded aggregation")
plt.xlabel("Threshold")
plt.ylabel("Score")
plt.gca().legend(loc='upper right', shadow=True)
plt.gcf().subplots_adjust(bottom=0.16, left=.15)
plt.show()
In [572]:
# Experiment 4a
import json
from collections import OrderedDict
tags = json.load(open("../experiment/results/merged_tags_ejh_resolved.json", "r"))
tags = OrderedDict(sorted(tags.items(), key=lambda t: t[0]))
num_tags = []
for i, t in enumerate(tags):
num_tags.append(len(np.where(np.asarray(tags[t]) != "")[0]))
print i, t
# Plot number of annotators who reported at least one track
figsize = (6, 3)
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
# plt.axvspan(8, 10.8, color='g', alpha=0.5)
# plt.axvspan(14, 14.8, color='g', alpha=0.5)
# plt.axvspan(37, 37.8, color='g', alpha=0.5)
plt.axvspan(0, 2.8, color='g', alpha=0.5)
plt.bar(np.arange(len(num_tags)), np.sort(num_tags))
plt.title("Analysis of difficulty from Human POV")
plt.axvspan(11, 12.8, color='g', alpha=0.5, label="Easy Tracks\nfrom Machine POV")
plt.xlabel("Tracks sorted by number of annotators who reported at least one tag")
plt.ylabel("Number of annotators\nwho reported at least one tag", multialignment='center')
plt.gca().legend(loc='lower right', shadow=True, prop={'size':11})
plt.gcf().subplots_adjust(bottom=0.18)
In [470]:
# Experiment 4b
from collections import Counter
tag_groups = ["annotator", "audio_quality", "form", "instrumentation", "style"]
tag_names = np.asarray(["Annotator", "Audio Quality", "Form", "Instrumentation", "Style"])
tag_frequency = np.zeros(len(tag_groups), dtype=int)
count_tags = []
for key, ann_tags in tags.iteritems():
for tag in ann_tags:
for real_tag in tag.split(","):
if real_tag != "":
count_tags.append(real_tag)
for i, tag_group in enumerate(tag_groups):
if real_tag.split("-")[0] in tag_group and real_tag.split("-")[0] != "":
tag_frequency[i] += 1
print tag_frequency
counter = Counter(count_tags)
print counter
# Sort tags
idx = np.argsort(tag_frequency)
tag_frequency = tag_frequency[idx]
tag_names = tag_names[idx]
# Plot
figsize = (6, 3)
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
plt.barh(np.arange(len(tag_frequency)), tag_frequency, align="center")
plt.yticks(np.arange(len(tag_frequency)))
plt.gca().set_yticklabels(tag_names)
plt.gcf().subplots_adjust(bottom=0.17, left=0.25)
plt.xlabel("Number of tags")
plt.title("Difficult Tags Grouped by Type")
Out[470]:
In [571]:
import json
COLORS = np.asarray(["b", "g", "r", "m", "y", "c"])
ANNOTS = np.asarray(["GT", "Ann1", "Ann2", "Ann3", "Ann4", "Ann5"])
SD = np.load("/Users/uri/Dropbox/NYU/Publications/ISMIR2014-NietoHumphreyFarboodBello/exp2b.npz")
num_trakcs = SD.shape[0] #(num_tracks, num_annotators, num_annotators, metric)
num_annots = SD.shape[1] # or SD.shape[2]
SD_F3 = SD[:, :, :, 0] # Get only the F3
print num_metrics
# Copy the elements from the upper triangle to the lower
for i in range(num_annots):
for j in range(num_annots):
SD[:, j, i] = SD[:, i, j]
# Setup the figure
figsize = (6, 3)
plt.figure(1, figsize=figsize, dpi=120, facecolor='w', edgecolor='k')
# Main loop to plot the bars
N = 8
for i in range(num_annots):
# Get indices to select for the colors and the annotators (except the current one)
all_idxs = np.arange(num_annots)
idxs = np.delete(all_idxs, i)
color = COLORS[idxs]
labels = ANNOTS[idxs]
# Get all the means for all the annotators (except the current one)
annot = np.asarray([np.mean(x) for x in SD_F3.swapaxes(2,0)[idxs, i, :]])
# Sort
sort_idxs = np.argsort(annot)
# annot = annot[sort_idxs]
# color = color[sort_idxs]
# labels = labels[sort_idxs]
# Plot
if i != 0:
last_bar = bars[-1]
bars = plt.bar(np.arange(i*N, i*N+len(annot)), annot, width=1, color=color, align="center")
bars += (last_bar,)
labels = tuple(labels) + ("Ann5",)
# plt.legend((bars[0], bars[4], bars[2], bars[3], bars[1], bars[5]),
# (labels[0], labels[4], labels[2], labels[3], labels[1], labels[5]) , loc='center right')
plt.legend(bars, labels, loc='center right')
plt.xticks(np.arange(num_annots/2-1, N*num_annots, N))
plt.gca().set_xticklabels(ANNOTS)
plt.gca().set_ylim(0.45, 0.7)
plt.gca().set_xlim(-1, N*num_annots + 18)
plt.ylabel("Hit Rate F-measure")
plt.title("Agreement between Human Annotations")
Out[571]: