In [1]:
from glob import glob
import os
import pickle
import json
from configparser import ConfigParser
import operator
import itertools
import tensorflow as tf
from sklearn import metrics
import joblib
import numpy as np
from scipy.sparse import coo_matrix
from scipy import optimize
import matplotlib.pyplot as plt
import matplotlib.colors as colors
%matplotlib inline
In [8]:
root = '/media/ildefonso/HD-LCU3/tf_syl_seg/koumura_repo_results/'
these_dirs_branch = [
('results_181014_194418', 'bird 0'),
('results_181015_075005', 'bird 1'),
('results_181014_194508', 'bird 2'),
#('results_180306_145149', 'bird 3'),
#('results_180308_134732', 'bird 4'),
('results_181017_105732', 'bird 5'),
('results_181016_010102', 'bird 6'),
('results_181016_074937', 'bird 7'),
('results_181015_103121', 'bird 9')
]
these_dirs = [(root + this_dir[0], this_dir[1]) for this_dir in these_dirs_branch]
In [9]:
config = ConfigParser()
all_results_list = []
for this_dir, bird_ID in these_dirs:
results_dict = {}
results_dict['dir'] = this_dir
os.chdir(this_dir)
config.read(glob('config*')[0])
results_dict['data_dir'] = config['DATA']['data_dir']
results_dict['time_steps'] = config['NETWORK']['time_steps']
results_dict['num_hidden'] = config['NETWORK']['num_hidden']
results_dict['train_set_durs'] = [int(element)
for element in
config['TRAIN']['train_set_durs'].split(',')]
with open(glob('summary*/train_err')[0], 'rb') as f:
results_dict['train_err'] = pickle.load(f)
with open(glob('summary*/test_err')[0], 'rb') as f:
results_dict['test_err'] = pickle.load(f)
pe = joblib.load(glob('summary*/y_preds_and_err_for_train_and_test')[0])
results_dict['train_syl_err_rate'] = pe['train_syl_err_rate']
results_dict['test_syl_err_rate'] = pe['test_syl_err_rate']
results_dict['bird_ID'] = bird_ID
all_results_list.append(results_dict)
os.chdir('..')
for el in all_results_list:
el['mn_test_err'] = np.mean(el['test_err'], axis=1)
el['mn_train_err'] = np.mean(el['train_err'], axis=1)
el['mn_train_syl_err'] = np.mean(el['train_syl_err_rate'], axis=1)
el['mn_test_syl_err'] = np.mean(el['test_syl_err_rate'], axis=1)
el['train_set_durs'] = np.asarray(el['train_set_durs'])
plot framewise error
In [10]:
all_mn_test_err = []
for el in all_results_list:
all_mn_test_err.append(el['mn_test_err'])
all_mn_test_err = np.asarray(all_mn_test_err)
In [37]:
plt.yticks?
In [79]:
plt.style.use('ggplot')
fig, ax = plt.subplots()
for el in all_results_list:
lbl = (el['bird_ID'])
ax.plot(el['train_set_durs'],
el['mn_test_err'],
label=lbl,
linestyle='--',
marker='o')
ax.plot(el['train_set_durs'],np.median(all_mn_test_err,axis=0),
linestyle='--',marker='o',linewidth=3,color='k',label='median across birds')
fig.set_size_inches(16,8)
plt.legend(fontsize=20)
plt.xticks(el['train_set_durs'])
plt.tick_params(axis='both', which='major', labelsize=20, rotation=45)
plt.title('Frame error rate as a function of training set size', fontsize=40)
plt.ylabel('Frame error rate\nas measured on test set', fontsize=32)
plt.xlabel('Training set size: duration in s', fontsize=32);
plt.tight_layout()
plt.savefig('frame-err-rate-v-train-set-size.png')
In [81]:
plt.style.use('ggplot')
fig, ax = plt.subplots()
fig.set_size_inches(16,8)
for el in all_results_list:
lbl = (el['bird_ID'])
ax.plot(el['train_set_durs'],
el['mn_test_syl_err'],
label=lbl,
linestyle=':',
marker='o')
plt.scatter(120,0.84,s=75)
plt.text(75,0.7,'Koumura & Okanoya 2016,\n0.84 note error rate\nwith 120s training data', fontsize=20)
plt.scatter(480,0.5,s=75)
plt.text(355,0.35,'Koumura & Okanoya 2016,\n0.46 note error rate\nwith 480s training data', fontsize=20)
plt.legend(fontsize=20, loc='upper right');
plt.title('Syllable error rate as a function of training set size', fontsize=40)
plt.xticks(el['train_set_durs'])
plt.tick_params(axis='both', which='major', labelsize=20, rotation=45)
plt.ylabel('Syllable error rate\nas measured on test set', fontsize=32)
plt.xlabel('Training set size: duration in s', fontsize=32);
plt.tight_layout()
plt.savefig('syl-error-rate-v-train-set-size.png')
In [ ]: