In [38]:
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
seaborn.set()
import numpy as np
np.set_printoptions(precision=3)
%matplotlib inline
In [39]:
import os
import glob
import re
def load_lengths(idx):
df = pd.DataFrame(index=idx, columns=['duration'])
for jf in idx:
# get the basename
match = re.match('SALAMI_(?P<tracknum>\d+).jams', jf)
if not match:
raise ValueError('Unmatched file: {}'.format(jf))
# find an annotation file
annotation = '/home/bmcfee/data/SALAMI/data_cleaned/{:s}/parsed/textfile1_functions.lab'.format(match.group('tracknum'))
data = pd.read_csv(annotation, header=None, sep='\t')
df['duration'][jf] = data[1].max()
return df
In [40]:
import os
import glob
import re
def load_min_lengths(idx):
small_df = pd.Series()
large_df = pd.Series()
for jf in idx:
# get the basename
match = re.match('SALAMI_(?P<tracknum>\d+).jams', jf)
if not match:
raise ValueError('Unmatched file: {}'.format(jf))
# find an annotation file
for annotator in [1, 2]:
a1 = '/home/bmcfee/data/SALAMI/data_cleaned/{:s}/parsed/textfile{:d}_small.lab'.format(match.group('tracknum'), annotator)
data = pd.read_csv(a1, header=None, sep='\t')
small_df = pd.concat([small_df, data[1] - data[0]])
a1 = '/home/bmcfee/data/SALAMI/data_cleaned/{:s}/parsed/textfile{:d}_functions.lab'.format(match.group('tracknum'), annotator)
data = pd.read_csv(a1, header=None, sep='\t')
large_df = pd.concat([large_df, data[1] - data[0]])
return small_df, large_df
In [41]:
# df = pd.read_csv('/home/bmcfee/git/hier_eval/notes/searching_w.csv')
df = pd.read_csv('searching_w_nontransitive.csv')
In [42]:
df = df.drop('Unnamed: 0', axis=1)
In [43]:
df = df[np.argsort(df.columns)]
In [44]:
df = df.set_index('file')
In [45]:
df = df.sort_index()
In [ ]:
lengths = load_lengths(df.index)
In [10]:
small, func = load_min_lengths(df.index)
In [11]:
small.describe(percentiles=[0.5, 0.95])
Out[11]:
In [12]:
func.describe()
Out[12]:
In [13]:
df = df.join(lengths)
In [14]:
for k in ['0.5', '03', '05', '10', '15', '20', '25', '30', '60']:
plt.figure()
seaborn.jointplot(df['To_{}'.format(k)], df['Tu_{}'.format(k)], kind='kde')
plt.title('w={}'.format(k))
plt.tight_layout()
In [15]:
for k in ['0.5', '03', '05', '10', '15', '20', '25', '30', '60']:
plt.figure()
seaborn.jointplot(df['duration'], df['Tf_{}'.format(k)], kind='reg')
plt.title('w={}'.format(k))
plt.tight_layout()
In [46]:
t_f = df[df.columns[:14]]
In [47]:
t_o = df[df.columns[14:28]]
t_u = df[df.columns[28:42]]
In [48]:
cols = t_f.columns
In [49]:
t_f.columns = [x.replace('Tf_', '') for x in cols]
In [50]:
seaborn.set(style='ticks')
In [51]:
seaborn.set_style('darkgrid')
plt.figure(figsize=(6,3))
seaborn.boxplot(t_f, color=seaborn.color_palette('Blues')[3])
plt.xlabel('$w$ (seconds)')
plt.ylabel('$\mathcal{T}_F$')
plt.savefig('../figs/tfw.pdf', bbox_inches='tight', pad_inches=0.05)
In [233]:
plt.figure(figsize=(6,4))
seaborn.boxplot(t_o, color=seaborn.color_palette('Blues')[3])
plt.xlabel('$w$')
plt.ylabel('$\mathcal{T}_O$')
#plt.savefig('../figs/tfw.pdf')
Out[233]:
In [234]:
plt.figure(figsize=(6,4))
seaborn.boxplot(t_u, color=seaborn.color_palette('Blues')[3])
plt.xlabel('$w$')
plt.ylabel('$\mathcal{T}_U$')
#plt.savefig('../figs/tfw.pdf')
Out[234]: