In [41]:
import json
import glob
import os
import sys
import mir_eval
from collections import OrderedDict
# MSAF imports
import msaf
from msaf import jams2
from msaf import utils
# And the tree module
sys.path.append("/Users/uri/NYU/Spring14/hier_eval/")
import tree
algos = ["olda", "siplca", "serra", "levy", "foote"]
params_dict = {"olda" : "", "siplca" : "", "serra" : "mix",
"levy" : "mfcc" , "foote" : "mfcc"}
annotators = OrderedDict()
annotators["GT"] = {
"name" : "GT",
"email" : "TODO"
}
annotators["Colin"] = {
"name" : "Colin",
"email" : "colin.z.hua@gmail.com"
}
annotators["Eleni"] = {
"name" : "Eleni",
"email" : "evm241@nyu.edu"
}
annotators["Evan"] = {
"name" : "Evan",
"email" : "esj254@nyu.edu"
}
annotators["John"] = {
"name" : "John",
"email" : "johnturner@me.com"
}
annotators["Shuli"] = {
"name" : "Shuli",
"email" : "luiseslt@gmail.com"
}
dataset_path = "/Users/uri/datasets/SubSegments/annotations/"
In [16]:
# SHAG functions (copy pasted)
def round_time(t, res=0.1):
v = int(t / float(res)) * res
return v
def create_jams(boundaries, out_file):
jam = jams2.Jams()
jam.metadata.duration = boundaries[0][-1]
sections = jam.sections.create_annotation()
for c, boundary_level in enumerate(boundaries):
b_intervals = zip(boundary_level[:], boundary_level[1:])
for b_interval in b_intervals:
section = sections.create_datapoint()
section.start.value = b_interval[0]
section.end.value = b_interval[1]
section.label.context = str(c)
with open(out_file, "w") as f:
json.dump(jam, f, indent=2)
def draw_tree(T):
H = lca_matrix(T)
n = len(H)
# Find the grid points
idx = np.squeeze(np.argwhere(np.sum(np.abs(np.diff(H, axis=0)), axis=1) > 0)) + 1
plt.imshow(H, aspect='equal', interpolation='nearest', cmap='hot_r', extent=[0, len(H), len(H), 0])
plt.vlines(idx, 0, n, colors='k', alpha=0.15)
plt.hlines(idx, 0, n, colors='k', alpha=0.15)
sr = 10
plt.xticks(np.arange(0, len(H), len(H)/4), np.arange(0, len(H), len(H)/4) / float(10))
plt.yticks(np.arange(0, len(H), len(H)/4), np.arange(0, len(H), len(H)/4) / float(10))
plt.xlabel('Duration (seconds)')
plt.show()
def lca_matrix(tree, res=0.1):
'''
Input: a segment tree
Output: an n-by-n integer matrix indicating the height of the least common ancestor of each pair of frames (i, j)
'''
# Figure out how many frames we need
n = int((round_time(tree.root.segment.end, res=res) - round_time(tree.root.segment.start, res=res) ) / res)
# Build a mapping of level->height
height_map = {}
# Initialize the LCA matrix
H = np.zeros( (n, n), dtype=np.uint8)
# Breadth-first traversal of the tree
queue = [tree.root]
while queue:
node = queue.pop(0)
# Get the node's level
if node.parent is not None:
height_map[node] = 1 + height_map[node.parent]
else:
height_map[node] = 0
s = int(round_time(node.segment.start, res=res) / res)
t = int(round_time(node.segment.end, res=res) / res)
H[s:t, s:t] = height_map[node]
queue.extend(node.children)
return H
In [26]:
def find_nearest(array, value):
idx = (np.abs(array-value)).argmin()
return array[idx]
def adjust_boundaries(all_bounds):
"""Adjusts the large scale boundaries to the small scale ones."""
small = np.asarray(all_bounds[0])
large = np.asarray(all_bounds[1])
for i, val in enumerate(large):
large[i] = find_nearest(small, val)
return [small] + [large]
def get_boundaries(jam_file, annotator_name, context):
"""Gets the boundaries of a specific annotator and context."""
if annotator_name == "GT":
ds_prefix = os.path.basename(jam_file).split("_")[0]
context = msaf.prefix_dict[ds_prefix]
ann_inters, ann_labels = jams2.converters.load_jams_range(jam_file,
"sections", annotator_name=annotator_name, context=context)
bounds = utils.intervals_to_times(ann_inters)
return bounds
def get_all_flat_boundaries(jam_file, ann_context="large_scale", with_gt=False):
"""Gets all the boundaries for a given jam_file, with a specific context."""
all_bounds = []
if with_gt:
start = 0
else:
start = 1
for i in xrange(start, len(annotators.keys())):
bounds = get_boundaries(jam_file, annotators.keys()[i], ann_context)
all_bounds.append(bounds)
return all_bounds
def get_all_hier_boundaries(jam_file):
"""Gets all the boundaries for a given jam_file, large and small scale."""
all_bounds = []
for i in xrange(1, len(annotators.keys())):
large_bounds = get_boundaries(jam_file, annotators.keys()[i], "large_scale")
small_bounds = get_boundaries(jam_file, annotators.keys()[i], "small_scale")
# Build hierarchy
hier_bounds = np.asarray([small_bounds.tolist()] + [large_bounds.tolist()])
hier_bounds = adjust_boundaries(hier_bounds)
create_jams(hier_bounds, "test/tmp.jams")
# T = tree.SegmentTree("test/tmp.jams", annotation_id=0)
# draw_tree(T)
all_bounds.append(hier_bounds)
return all_bounds
def get_duration(jam_file):
"""Returns the duration of a file."""
jam = jams2.load(jam_file)
return float(jam.metadata.duration)
def plot_boundaries(all_bounds, out_file=None):
"""Plots all the boundaries in all_bounds."""
plt.figure(figsize=(6,4))
N = float(len(all_bounds))
for i, bounds in enumerate(all_bounds):
for bound in bounds:
plt.axvline(bound, ymin=i/N, ymax=(i+1)/N)
plt.axhline(i/N, color="gray")
plt.title("Human Annotated Boundaries")
plt.yticks(np.arange(N)/N + .5/N)
plt.gca().set_yticklabels(["Ann_%d" % i for i in xrange(int(N))])
if out_file is not None:
plt.tight_layout()
plt.savefig(out_file)
plt.show()
def build_hier_bounds(weights, times, N=5):
"""Builds the hierarchical boundaries given the weights and the times"""
hier = []
for i in xrange(N):
n = (i+1)/float(N)
idxs = np.where(weights >= n)
hier.append(times[idxs].tolist())
return hier
In [35]:
def merge_down(all_bounds, duration, win=3, res=100):
"""Merges multiple flat boundaries into a single flat annotation.
Parameters
----------
all_bounds: list
List of np.arrays representing the boundaries in seconds.
duration: float
Duration in seconds of the entire track.
win: float
Window in seconds to merge the boundaries.
res: int
Resolution of the histogram.
"""
hmax = int(np.ceil(duration / float(res)) * res)
bins = int(hmax / float(win))
count = np.zeros(bins)
for bounds in all_bounds:
bounds = bounds[1:-1] # remove first and last bound
curr_count, bins = np.histogram(bounds, bins=bins, range=(0, hmax))
count += curr_count
count /= float(len(all_bounds))
# Remove zeros
zeros_idx = np.argwhere(count == 0)
bounds_weights = np.delete(count, zeros_idx)
bounds_times = np.delete(bins, zeros_idx)[:-1] + win/2.0 # Center times
# First and last boundaries
bounds_weights = np.concatenate(([1.0], bounds_weights, [1.0]))
bounds_times = np.concatenate(([0.0], bounds_times, [duration]))
return bounds_weights, bounds_times
def merge_flat_to_flat(all_bounds, duration, win=3, res=100):
"""Merges multiple flat boundaries into a single flat annotation."""
bounds_weights, bounds_times = merge_down(all_bounds, duration, win, res)
# TODO: Low pass filter?
return bounds_weights, bounds_times
def merge_flat_to_hier(all_bounds, duration, win=3, res=100):
"""Merges multiple flat boundaries into a single hierarchical annotation."""
bounds_weights, bounds_times = merge_down(all_bounds, duration, win, res)
# TODO: Low pass filter?
N = len(all_bounds)
hier = build_hier_bounds(bounds_weights, bounds_times, N)
create_jams(hier, "test/tmp.jams")
T = tree.SegmentTree("test/tmp.jams", annotation_id=0)
# draw_tree(T)
return T
def merge_hier_to_flat(all_hier_bounds, duration, win=3, res=100):
"""Merges multiple hierarchical 2-level boundaries into a single flat annotation."""
all_bounds = []
for hier_bounds in all_hier_bounds:
all_bounds.append(hier_bounds[0]) # Small scale
all_bounds.append(hier_bounds[1]) # Large scale
bounds_weights, bounds_times = merge_down(all_bounds, duration, win, res)
# TODO: Low pass filter?
return bounds_weights, bounds_times
def merge_hier_to_hier(all_hier_bounds, duration, win=3, res=100):
"""Merges multiple hierarchical 2-level boundaries into a single hierarchical annotation."""
all_bounds = []
for hier_bounds in all_hier_bounds:
all_bounds.append(hier_bounds[0]) # Small scale
all_bounds.append(hier_bounds[1]) # Large scale
bounds_weights, bounds_times = merge_down(all_bounds, duration, win, res)
# TODO: Low pass filter?
N = len(all_bounds)
hier = build_hier_bounds(bounds_weights, bounds_times, N)
create_jams(hier, "test/tmp.jams")
T = tree.SegmentTree("test/tmp.jams", annotation_id=0)
# draw_tree(T)
return T
In [38]:
context = "large_scale"
jam_file = dataset_path + "Epiphyte_0220_promiscuous.jams" # Easy track
all_flat_bounds = get_all_flat_boundaries(jam_file, context, with_gt=False)
plot_boundaries(all_flat_bounds, out_file="low-variation-references.pdf")
# jam_file = dataset_path + "Cerulean_Bob_Dylan-Hurricane.jams" # Harder
# jam_file = dataset_path + "Cerulean_Boston_Symphony_Orchestra_&_Charles_Munch-Sympho.jams" # Harder,
all_flat_bounds = get_all_flat_boundaries(jam_file, context, with_gt=False)
plot_boundaries(all_flat_bounds, out_file="high-variation-references.pdf")
# print all_flat_bounds
duration = get_duration(jam_file)
bounds_weights, bounds_times = merge_flat_to_flat(all_flat_bounds, duration, win=3)
print bounds_weights
print bounds_times
plt.scatter(bounds_times, bounds_weights)
plt.show()
T = merge_flat_to_hier(all_flat_bounds, duration, win=3)
draw_tree(T)
all_hier_bounds = get_all_hier_boundaries(jam_file)
bounds_weights, bounds_times = merge_hier_to_flat(all_hier_bounds, duration, win=3)
print bounds_weights
print bounds_times
print len(bounds_weights), len(bounds_times)
plt.scatter(bounds_times, bounds_weights)
plt.show()
T = merge_hier_to_hier(all_hier_bounds, duration, win=3)
draw_tree(T)
In [64]:
# Evaluation of merged boundaries
def weighted_hit_rate(ref_inters, est_inters, weights_inters, trim=False, window=3):
ref = utils.intervals_to_times(ref_inters)
est = utils.intervals_to_times(est_inters)
weights = utils.intervals_to_times(np.asarray(weights_inters))
if trim:
ref = ref[1:-1]
est = est[1:-1]
weights = weights[1:-1]
# Find matches
matching = mir_eval.util.match_events(ref,est,window)
print "matching", matching
# Apply weights
hits = np.zeros((len(matching)))
for i in xrange(len(matching)):
hits[i] = weights[matching[i][1]]
# Compute the precision denominator (if hit not found, take mean of weights)
denom_prec = np.ones(len(est)) * np.mean(weights)
for i in xrange(len(matching)):
denom_prec[matching[i][0]] = weights[matching[i][1]]
# Compute scores
precision = np.sum(hits) / np.sum(denom_prec)
recall = np.sum(hits) / np.sum(weights)
f = mir_eval.util.f_measure(precision, recall)
print f, precision, recall
print est
print ref
print weights
return precision, recall, f
def shag(T_ref, T_est, res=0.1, window=None, transitive=True):
# First, build the LCA matrices
H_ref = lca_matrix(T_ref, res=res)
H_est = lca_matrix(T_est, res=res)
# Make sure we have the right number of frames
assert H_ref.shape == H_est.shape
# How many frames?
n = H_ref.shape[0]
# By default, the window covers the entire track
if window is None:
window = n
# Initialize the score
score = 0.0
# Iterate over query frames
n_f = 0
for q in range(n):
# Find all pairs i,j such that H_ref[q, i] > H_ref[q, j]
R = H_ref[q, max(0, q-window):min(n, q+window)]
# And the same for the estimation
E = H_est[q, max(0, q-window):min(n, q+window)]
if transitive:
# Transitive: count comparisons across any level
S_ref = np.greater.outer(R, R)
else:
# Non-transitive: count comparisons only across immediate levels
S_ref = np.equal.outer(R, R+1)
S_est = np.greater.outer(E, E)
# Don't count (q,q) as a result
idx = min(q, window)
S_ref[idx, :] = False
S_ref[:, idx] = False
# Compute normalization constant
Z = float(S_ref.sum())
# Add up agreement for frames
if Z > 0:
score += np.sum(np.logical_and(S_ref, S_est)) / Z
n_f += 1.0
if n_f:
return score / n_f
# Convention: 0/0 = 0
return score
def compute_weighted(merged_times, est_inters, merged_weights, trim, window=3):
merged_inters = utils.times_to_intervals(merged_times)
merged_weight_inters = utils.times_to_intervals(merged_weights)
p, r, f = weighted_hit_rate(merged_inters, est_inters, merged_weight_inters, trim, window=window)
return p, r, f
def compute_shags(ref_T, est_T, window=None, transitive=True):
H_u = shag(ref_T, est_T, window=window, transitive=transitive)
H_o = shag(est_T, ref_T, window=window, transitive=transitive)
if H_u + H_o == 0:
return H_u, H_o, 0
return H_u, H_o, 2 * H_u * H_o / float(H_u + H_o)
In [65]:
import itertools
annotations_path = "/Users/uri/datasets/SubSegments/annotations/"
estimations_path = "/Users/uri/datasets/SubSegments/estimations/"
annotations = glob.glob(annotations_path + "*.jams")
estimations = glob.glob(estimations_path + "*.json")
assert len(annotations) == len(estimations)
results = np.zeros((len(algos), 50, 12, 10)) # 5 Algos, 50 tracks, 3 merged1 + 3 merged2 + 3 merged3 + 3 merged4 = 12, 5 sets
context = "large_scale"
trim = True
def read_estimations(est_file, alg_id, annot_beats, annot_bounds=False,
bounds=True, **params):
"""Reads the estimations (either boundaries or labels) from an estimated
file.
Parameters
----------
est_file: str
Path to the estimated file (JSON file).
alg_id: str
Algorithm ID from which to retrieve the boundaries. E.g. serra
annot_beats: bool
Whether to retrieve the boundaries using annotated beats or not.
annot_bounds: bool
Whether to retrieve the boundaries using annotated bounds or not.
bounds : bool
Whether to extract boundaries or labels
params: dict
Additional search parameters. E.g. {"features" : "hpcp"}
Returns
-------
estimations = np.array(N,2) or np.array(N)
Set of read estimations:
If boundaries: interval format (start, end).
If labels: flattened format.
"""
est_data = json.load(open(est_file, "r"))
if bounds:
est_type = "boundaries"
else:
est_type = "labels"
if est_type not in est_data.keys() or alg_id not in est_data[est_type]:
if bounds:
logging.error("Estimation not found for algorithm %s in %s" %
(est_file, alg_id))
return []
estimations = []
for alg in est_data[est_type][alg_id]:
if alg["annot_beats"] == annot_beats:
# Match non-empty parameters
found = True
for key in params:
if key != "feature" and (key not in alg.keys() or \
(params[key] != "" and alg[key] != params[key])):
found = False
if not bounds:
if "annot_bounds" in alg.keys():
if alg["annot_bounds"] != annot_bounds:
found = False
if found:
estimations = np.array(alg["data"])
if bounds:
# Sort the boundaries by time
estimations = np.sort(estimations)
break
# Convert to interval if needed
if bounds:
estimations = np.asarray(zip(estimations[:-1], estimations[1:]))
return estimations
def compute_evaluations(merges, est_T):
hier_win = 10
transitive = True
win = 3
res = np.zeros(12)
res[:3] = compute_weighted(merges["f2f_times"], est_inters, merges["f2f_weights"], trim, window=win)
# res[3:6] = compute_shags(merges["f2h_T"], est_T, window=hier_win, transitive=transitive)
res[6:9] = compute_weighted(merges["h2f_times"], est_inters, merges["h2f_weights"], trim, window=win)
# res[9:] = compute_shags(merges["h2h_T"], est_T, window=hier_win, transitive=transitive)
return res
def compute_merged_bounds(annot_file, annot_idxs):
merges = {}
# Flats
all_flat_bounds = get_all_flat_boundaries(annot_file, context)
duration = get_duration(annot_file)
all_flat_bounds = list( all_flat_bounds[i] for i in annot_idxs )
merges["f2f_weights"], merges["f2f_times"] = merge_flat_to_flat(all_flat_bounds, duration, win=3)
merges["f2h_T"] = merge_flat_to_hier(all_flat_bounds, duration, win=3)
# Hiers
all_hier_bounds = get_all_hier_boundaries(jam_file)
all_hier_bounds = list( all_hier_bounds[i] for i in annot_idxs )
merges["h2f_weights"], merges["h2f_times"] = merge_hier_to_flat(all_hier_bounds, duration, win=3)
merges["h2h_T"] = merge_hier_to_hier(all_hier_bounds, duration, win=3)
return merges
def print_evaluation(evals):
print "Flat to Flat", evals[:3]
print "Flat to Hier", evals[3:6]
print "Hier to Flat", evals[6:9]
print "Hier to Hier", evals[9:]
for j, algo_id in enumerate(algos):
print "Evaluating algorithm:", algo_id
for k, annot_idx in enumerate(itertools.combinations(range(5), 2)):
# for k, annot_idx in enumerate(itertools.combinations(range(5), 3)):
# for k, annot_idx in enumerate(itertools.combinations(range(5), 4)):
# for annot_idx in itertools.combinations(range(5), 4):
print annot_idx
for i, (annot_file, est_file) in enumerate(zip(annotations[:], estimations[:])):
assert os.path.basename(annot_file)[:-4] == os.path.basename(est_file)[:-4]
print "Computing: %s" % annot_file
### Merges ###
merges = compute_merged_bounds(annot_file, annot_idx)
### Evaluations ###
annot = "GT"
params = {"feature" : params_dict[algo_id]}
est_inters = read_estimations(est_file, algo_id, False, **params)
# ds_prefix = os.path.basename(annot_file).split("_")[0]
# if annot == "GT":
# ann_inter, ann_labels = jams2.converters.load_jams_range(annot_file,
# "sections", annotator=0, context=MSAF.prefix_dict[ds_prefix])
# else:
# ann_inter, ann_labels = jams2.converters.load_jams_range(annot_file,
# "sections", annotator_name=annot, context="large_scale")
# res = EV.compute_results(ann_inter, est_inters, trim, 250, est_file)
# # res: [P3, R3, F3, P05, R05, F05, D, ann_to_est, est_to_ann]
# print "Standard:\t", res[0], res[1], res[2]
# Create tree from flat estimated boundaries
duration = get_duration(annot_file)
est_inters[-1][-1] = np.min([est_inters[-1][-1], duration])
create_jams([utils.intervals_to_times(est_inters)], "test/tmp.jams")
est_T = tree.SegmentTree("test/tmp.jams", annotation_id=0)
# Compute all the evaluations
res = compute_evaluations(merges, est_T)
#print_evaluation(res)
results[j, i, :, k] = res
sys.exit()
np.save(open("results_merged-pairs.npy", "w"), results)
In [34]:
# Evaluate algorithm disagreement
import msaf
import glob
import mir_eval
import os
from msaf import jams2
from msaf import input_output as io
from msaf import utils
from msaf import plotting
reload(io)
reload(plotting)
algo_ids = ["cc", "cnmf3", "foote", "sf", "siplca"]
def read_all_est_bounds(ref_file):
all_est_bounds = []
for algo_id in algo_ids:
est_bounds, est_labels = io.read_estimations(est, algo_id, feature="hpcp")
all_est_bounds.append(est_bounds)
return all_est_bounds
class Boundaries:
def __init__(self, name):
self.name = name
self.all_times = []
def set_ref_times(self, ref_times):
self.ref_times = ref_times
self.ref_inters = utils.times_to_intervals(ref_times)
def set_ref_inters(self, ref_inters):
self.ref_inters = ref_inters
self.ref_times = utils.intervals_to_times(ref_inters)
def set_all_est_times(self, all_est_times):
self.all_est_times = all_est_times
self.all_est_inters = []
for est_times in all_est_times:
self.all_est_inters.append(utils.times_to_intervals(est_times))
def set_all_est_inters(self, all_est_inters):
self.all_est_inters = all_est_inters
self.all_est_times = []
for est_inters in all_est_inters:
self.all_est_times.append(utils.intervals_to_times(est_inters))
def set_score(self, score):
self.score = score
ests = glob.glob("/Users/uri/datasets/SubSegments/estimations/*.jams")
audios = glob.glob("/Users/uri/datasets/SubSegments/audio/*.mp3")
all_scores = []
all_boundaries = []
for i, (est, audio) in enumerate(zip(ests, audios)):
# Create Boundaries object
B = Boundaries(os.path.basename(audio).replace(".mp3", ""))
B.set_all_est_inters(read_all_est_bounds(est))
ref_times, ref_labels = io.read_references(audio)
B.set_ref_times(ref_times)
# Calculate hit-rate measure at 3 seconds with no trimming
hit_rate3 = []
for est_inters in B.all_est_inters:
P, R, F = mir_eval.segment.detection(B.ref_inters, est_inters, window=3, trim=False)
hit_rate3.append(F)
hit_rate3 = np.mean(hit_rate3)
B.all_times = [B.ref_times] + B.all_est_times
all_boundaries.append(B)
# Store and print results
all_scores.append(hit_rate3)
print hit_rate3, os.path.basename(audio)
# Plot worst and best performing track
arg_min = np.argmin(all_scores)
plotting.plot_boundaries(all_boundaries[arg_min].all_times,
all_boundaries[arg_min].name,
algo_ids=algo_ids, output_file="poor_agreement.pdf")
algo_ids = ["cc", "cnmf3", "foote", "sf", "siplca"]
arg_max = np.argmax(all_scores)
arg_max = 10
plotting.plot_boundaries(all_boundaries[arg_max].all_times,
all_boundaries[arg_max].name,
algo_ids=algo_ids, output_file="good_agreement.pdf")
In [ ]:
_