In [1]:
import mir_eval
from __future__ import division
import os
import librosa
import medleydb as mdb
import numpy as np
import scipy
import matplotlib.pyplot as plt
import IPython.display
import seaborn as sns
%matplotlib inline
import pandas as pd
import jams
import itertools
In [2]:
def getOnsetTimes(j):
return j.search(namespace='onset')[0].data.to_interval_values()[0][:, 0]
def findIdxFalsePos(est_idxTruePos, onset_est):
numEstPos = len(onset_est)
est_idxFalsePos = list(set(range(numEstPos)) - set(est_idxTruePos))
est_idxFalsePos.sort()
return est_idxFalsePos
def findMissing(idxTruePos, idxOnset):
numEstPos = len(idxOnset)
idxMissing = list(set(range(numEstPos)) - set(idxTruePos))
idxMissing.sort()
return idxMissing
In [3]:
refBaseDir = './OnsetAnnotation_truth/'
estBaseDir = './OnsetEstimation_spectralFlux/'
annotations = os.listdir(refBaseDir)
In [9]:
a = annotations[0]
refPath = os.path.join(refBaseDir,a)
estPath = os.path.join(estBaseDir,a)
j_ref = jams.load(refPath)
j_est = jams.load(estPath)
onset_ref = getOnsetTimes(j_ref)
onset_est = getOnsetTimes(j_est)
In [11]:
keyList = ['relStemPowerBin', 'onsetSource', 'polyphony']
summary = [pd.DataFrame(),pd.DataFrame(),pd.DataFrame()]
for a in annotations:
refPath = os.path.join(refBaseDir,a)
estPath = os.path.join(estBaseDir,a)
j_ref = jams.load(refPath)
j_est = jams.load(estPath)
onset_ref = getOnsetTimes(j_ref)
onset_est = getOnsetTimes(j_est)
match = mir_eval.util.match_events(onset_ref,onset_est,0.05)
est_idxTruePos = [m[1] for m in match]
est_idxFalsePos = findMissing(est_idxTruePos, onset_est)
ref_idxTruePos = [m[0] for m in match]
ref_idxFalseNeg = findMissing(ref_idxTruePos, onset_ref)
df = pd.DataFrame(j_ref.search(namespace='onset')[0].data.value.tolist())
df.onsetSource = df.onsetSource.map(lambda x: "/".join(x))
df['relStemPower'] = df.powerStem - df.powerMix
df['relStemPower'] = df.powerStem - df.powerMix
df['relStemPowerBin'] = pd.cut(df.relStemPower,bins = np.linspace(-70,2,25))
df['TP'] = 0
df['FN'] = 0
df['TP'].loc[ref_idxTruePos] = 1
df['FN'].loc[ref_idxFalseNeg] = 1
for i, key in enumerate(keyList):
groupedDF = df[['TP','FN',key]].groupby(key).sum()
summary[i] = summary[i].add(groupedDF,fill_value=0)
In [25]:
filteredSummary = []
minCount = 1000
for df in summary:
Dfilter = df[(df['TP']+df['FN'])>minCount]
Dfilter['Recall'] = Dfilter['TP']/(Dfilter['TP']+Dfilter['FN'])
Dfilter = Dfilter.sort('Recall', ascending = False)
filteredSummary.append(Dfilter)
In [26]:
print 'Onset Level:'
for f in filteredSummary:
print f
print '======== ======== ======== ======== ======== ======== ======== ======== ========'
In [ ]:
In [501]:
trackSummary = []
onsetWindow = 0.05
for a in annotations:
refPath = os.path.join(refBaseDir,a)
estPath = os.path.join(estBaseDir,a)
j_ref = jams.load(refPath)
j_est = jams.load(estPath)
onset_ref = getOnsetTimes(j_ref)
onset_est = getOnsetTimes(j_est)
metrics = mir_eval.onset.evaluate(onset_ref, onset_est)
match = mir_eval.util.match_events(onset_ref,onset_est,onsetWindow)
est_idxTruePos = [m[1] for m in match]
est_idxFalsePos = findMissing(est_idxTruePos, onset_est)
ref_idxTruePos = [m[0] for m in match]
ref_idxFalseNeg = findMissing(ref_idxTruePos, onset_ref)
title= j_ref.file_metadata.title
genre = j_ref.sandbox['genre']
is_instrumental = j_ref.sandbox['is_instrumental']
TP = len(ref_idxTruePos)
FP = len(est_idxFalsePos)
FN = len(ref_idxFalseNeg)
F = metrics['F-measure']
precision = metrics['Precision']
recall = metrics['Recall']
trackSummary.append([title, genre,is_instrumental, F, precision, recall, TP, FP, FN])
In [502]:
trackLeveldf = pd.DataFrame(trackSummary)
trackLeveldf.columns = ['title', 'genre','is_instrumental', 'F', 'precision', 'recall', 'TP', 'FP', 'FN']
In [521]:
genredf = trackLeveldf[['genre','F', 'precision', 'recall']].groupby('genre').mean().sort('F', ascending = False)
instrumentaldf = trackLeveldf[['is_instrumental','F', 'precision', 'recall']].groupby('is_instrumental').mean().sort('F', ascending = False)
In [524]:
print genredf
print '======== ======== ======== ======== ======== ========'
print instrumentaldf
In [ ]:
In [ ]:
In [45]:
windowList = np.linspace(0.001,2,20)
for a in annotations:
refPath = os.path.join(refBaseDir,a)
estPath = os.path.join(estBaseDir,a)
j_ref = jams.load(refPath)
j_est = jams.load(estPath)
onset_ref = getOnsetTimes(j_ref)
onset_est = getOnsetTimes(j_est)
F = [mir_eval.onset.evaluate(onset_ref, onset_est,window=w)['F-measure'] for w in windowList]
plt.plot(windowList, F)
plt.xscale('log')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
## this works, but is slow, switch order of loops to avoid I/O problems
keyList = ['relStemPowerBin', 'onsetSource', 'polyphony']
summary = []
for key in keyList:
D = pd.DataFrame()
for a in annotations[0:2]:
refPath = os.path.join(refBaseDir,a)
estPath = os.path.join(estBaseDir,a)
j_ref = jams.load(refPath)
j_est = jams.load(estPath)
onset_ref = getOnsetTimes(j_ref)
onset_est = getOnsetTimes(j_est)
match = mir_eval.util.match_events(onset_ref,onset_est,0.05)
est_idxTruePos = [m[1] for m in match]
est_idxFalsePos = findMissing(est_idxTruePos, onset_est)
ref_idxTruePos = [m[0] for m in match]
ref_idxFalseNeg = findMissing(ref_idxTruePos, onset_ref)
df = pd.DataFrame(j_ref.search(namespace='onset')[0].data.value.tolist())
df.onsetSource = df.onsetSource.map(lambda x: "/".join(x))
df['relStemPower'] = df.powerStem - df.powerMix
df['relStemPower'] = df.powerStem - df.powerMix
df['relStemPowerBin'] = pd.cut(df.relStemPower,bins = np.linspace(-70,2,25))
df['TP'] = 0
df['FN'] = 0
df['TP'].loc[ref_idxTruePos] = 1
df['FN'].loc[ref_idxFalseNeg] = 1
groupedDF = df[['TP','FN',key]].groupby(key).sum()
D = D.add(groupedDF,fill_value=0)
summary.append(D)