In [176]:
%load_ext autoreload
%autoreload 2
import sys, os
sys.path.append('..')
from misc.config import c
from data_api import *
import cPickle
import pandas as pd
from data_api import *
results_dir = c['RESULTS_DIR']
In [34]:
all_results = {}
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'svmguide2'
'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
'svmguide4']
models = ['test_svm', 'test_r2elm', 'test_r2svm', 'test_r2lr', 'test_linear_svm', 'test_elm', 'random_r2svm',
'fixed_r2svm', 'triple_elm', 'triple_fixed_r2svm', 'triple_r2svm', 'triple_svm', 'exh__r2svm']
paths = [ os.path.join(results_dir, model + '_' + dataset) for model in models for dataset in datasets ]
In [327]:
all_results = {}
models = ['test_r2elm']
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'svmguide2',
'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
'svmguide4', 'vowel', 'vehicle']
paths = [ os.path.join(results_dir, model + '_' + dataset) for model in models for dataset in datasets ]
In [328]:
for path in paths:
if os.path.isdir(path):
print path
results = {}
for exp in os.listdir(path):
name = exp[:-11]
try:
exp_res = cPickle.load(open(os.path.join(path, exp),'r'))
except:
print exp
continue
merged_res = exp_res['monitors']
merged_res.update(exp_res['results'])
merged_res.update(exp_res['config']['params'])
results[name] = merged_res
name = path.split('/')[-1]
all_results[name] = results
In [329]:
all_results_pd = {}
for k, v in all_results.iteritems():
all_results_pd[k] = pd.DataFrame.from_dict(v).transpose()
In [330]:
for k, v in all_results_pd.iteritems():
v.to_csv(os.path.join(results_dir, 'csv', k))
In [331]:
csv_results = {}
csv_dir = os.path.join(results_dir, 'csv')
for csv_file in os.listdir(csv_dir):
print csv_file
if csv_file == 'old_r2svm':
continue
csv_results[csv_file] = pd.DataFrame.from_csv(os.path.join(csv_dir, csv_file))
In [332]:
models = ['test_elm', 'test_svm', 'test_linear_svm','test_r2svm', 'test_r2elm', 'random_r2svm', 'fixed_r2svm',
'triple_svm', 'triple_r2svm', 'triple_fixed_r2svm'] #,exh_r2svm]
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
'vehicle', 'svmguide2', 'svmguide4']
pd.options.display.float_format = '{:2.4f}'.format
best_results = {model: {} for model in models}
for model in models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
best_results[model][data] = df['mean_acc'].max() * 100
print "Best Mean Accuracy"
pd.DataFrame.from_dict(best_results)
Out[332]:
In [335]:
best_std = {model: {} for model in models}
for model in models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
if 'std' in df.columns:
best_std[model][data] = df.loc[df['mean_acc'].idxmax(),'std'] * 100
# scores = df.loc[df['mean_acc'].idxmax(),'acc_fold']
# best_std[model][data] = np.mean([np.std(fold_scores) for fold_scores in scores]) * 100
print "Best std"
pd.DataFrame.from_dict(best_std)
Out[335]:
In [333]:
best_train_time = {model: {} for model in models}
for model in models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
time = df.loc[df['mean_acc'].idxmax(),'train_time']
time = time.translate(None, '[]')
best_train_time[model][data] = np.mean([float(t) for t in time.split(', ')])
print "Best train time"
pd.DataFrame.from_dict(best_train_time)
Out[333]:
In [334]:
best_test_time = {model: {} for model in models}
for model in models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
time = df.loc[df['mean_acc'].idxmax(),'test_time']
time = time.translate(None, '[]')
best_test_time[model][data] = np.mean([float(t) for t in time.split(', ')])
print "Best testtime"
pd.DataFrame.from_dict(best_test_time)
Out[334]:
In [324]:
df = csv_results['test_r2elm_bank']
print df.loc[df['mean_acc'].idxmax()]
b = cPickle.load(open(os.path.join(results_dir, 'test_r2elm_bank', 'test_r2elm_bank_uF_h100_rF_b0.10_sT_fNo_.experiment')))
print b['results']
In [267]:
print sets_pd[['n_rows', 'n_dim', 'n_class', 'man_size']].to_latex()
In [263]:
sets = fetch_new_datasets()
sets += fetch_small_datasets()
sets += fetch_medium_datasets()
sets_pd = pd.DataFrame.from_dict({data.name: data for data in sets}).transpose()
In [230]:
csv_results['test_r2svm_german'].columns
Out[230]:
In [232]:
import sys, os
sys.path.append('..')
from misc.config import c
from data_api import *
import cPickle
import pandas as pd
from data_api import *
results_dir = c['RESULTS_DIR']
csv_results = {}
csv_dir = os.path.join(results_dir, 'csv')
for csv_file in os.listdir(csv_dir):
# print csv_file
csv_results[csv_file] = pd.DataFrame.from_csv(os.path.join(csv_dir, csv_file))
models = ['test_r2svm', 'test_r2elm', 'triple_r2svm']
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'svmguide2'
'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
'svmguide4']
d = {model_name: { data_name: {} for data_name in datasets } for model_name in models}
for model in models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
d[model][data]['beta'] = df.loc[df['mean_acc'].idxmax(), 'beta']
d[model][data]['fit_c'] = df.loc[df['mean_acc'].idxmax(), 'fit_c']
d[model][data]['recurrent'] = df.loc[df['mean_acc'].idxmax(), 'recurrent']
d[model][data]['scale'] = df.loc[df['mean_acc'].idxmax(), 'scale']
d[model][data]['use_prev'] = df.loc[df['mean_acc'].idxmax(), 'use_prev']
d[model][data]['seed'] = df.loc[df['mean_acc'].idxmax(), 'seed']
if model == 'test_r2elm':
d[model][data]['h'] = df.loc[df['mean_acc'].idxmax(), 'h']
In [233]:
d
Out[233]:
In [196]:
manifold_dim(cifar.data, thresh=0.90)
Out[196]:
In [94]:
results = {data: {} for data in datasets}
print models
for data in datasets:
res = {col: {} for col in ['acc', 'std', 'beta']}
for model in models:
if model + '_' + data not in results_pd.keys():
continue
df = results_pd[model + '_' + data]
res['acc'][model + '_all'] = df['mean_acc'].max()
res['acc'][model + '_scale=True'] = df[df.scale==True]['mean_acc'].max()
res['acc'][model + '_scale=False'] = df[df.scale==False]['mean_acc'].max()
res['acc'][model + '_recurrent=True'] = df[df.recurrent==True]['mean_acc'].max()
res['acc'][model + '_recurrent=False'] = df[df.recurrent==False]['mean_acc'].max()
res['acc'][model + '_use_prev=True'] = df[df.use_prev==True]['mean_acc'].max()
res['acc'][model + '_use_prev=False'] = df[df.use_prev==False]['mean_acc'].max()
for scale in [True, False]:
for use_prev in [True, False]:
for recurrent in [True, False]:
res['acc'][model +
'_scale=' + str(scale) +
'_reccurent=' + str(recurrent) +
'_use_prev=' + str(use_prev)] = df[(df.scale==scale) &
(df.recurrent==recurrent) &
(df.use_prev==use_prev)]['mean_acc'].max()
res['std'][model + '_all'] = df.loc[df['mean_acc'].idxmax(), 'std']
res['std'][model + '_scale=True'] = df[df.scale==True].loc[df[df.scale==True]['mean_acc'].idxmax(),'std']
res['std'][model + '_scale=False'] = df[df.scale==False].loc[df[df.scale==False]['mean_acc'].idxmax(),'std']
res['std'][model + '_recurrent=True'] = df[df.recurrent==True].loc[df[df.recurrent==True]['mean_acc'].idxmax(),'std']
res['std'][model + '_recurrent=False'] = df[df.recurrent==False].loc[df[df.recurrent==False]['mean_acc'].idxmax(),'std']
res['std'][model + '_use_prev=True'] = df[df.use_prev==True].loc[df[df.use_prev==True]['mean_acc'].idxmax(),'std']
res['std'][model + '_use_prev=False'] = df[df.use_prev==False].loc[df[df.use_prev==False]['mean_acc'].idxmax(),'std']
for scale in [True, False]:
for use_prev in [True, False]:
for recurrent in [True, False]:
res['std'][model +
'_scale=' + str(scale) +
'_reccurent=' + str(recurrent) +
'_use_prev=' + str(use_prev)] = df[(df.scale==scale) &
(df.recurrent==recurrent) &
(df.use_prev==use_prev)].loc[df[(df.scale==scale) &
(df.recurrent==recurrent) &
(df.use_prev==use_prev)]['mean_acc'].idxmax(),'std']
res['beta'][model + '_all'] = df.loc[df['mean_acc'].idxmax(), 'beta']
res['beta'][model + '_scale=True'] = df[df.scale==True].loc[df[df.scale==True]['mean_acc'].idxmax(),'beta']
res['beta'][model + '_scale=False'] = df[df.scale==False].loc[df[df.scale==False]['mean_acc'].idxmax(),'beta']
res['beta'][model + '_recurrent=True'] = df[df.recurrent==True].loc[df[df.recurrent==True]['mean_acc'].idxmax(),'beta']
res['beta'][model + '_recurrent=False'] = df[df.recurrent==False].loc[df[df.recurrent==False]['mean_acc'].idxmax(),'beta']
res['beta'][model + '_use_prev=True'] = df[df.use_prev==True].loc[df[df.use_prev==True]['mean_acc'].idxmax(),'beta']
res['beta'][model + '_use_prev=False'] = df[df.use_prev==False].loc[df[df.use_prev==False]['mean_acc'].idxmax(),'beta']
for scale in [True, False]:
for use_prev in [True, False]:
for recurrent in [True, False]:
res['beta'][model +
'_scale=' + str(scale) +
'_reccurent=' + str(recurrent) +
'_use_prev=' + str(use_prev)] = df[(df.scale==scale) &
(df.recurrent==recurrent) &
(df.use_prev==use_prev)].loc[df[(df.scale==scale) &
(df.recurrent==recurrent) &
(df.use_prev==use_prev)]['mean_acc'].idxmax(),'beta']
results[data] = pd.DataFrame.from_dict(res)
In [107]:
r2_models = ['test_r2elm', 'test_r2svm', 'test_r2lr', 'random_r2svm',
'lmao_r2svm', 'center_random_r2svm', 'switched_r2elm', 'triple_lmao_r2svm', 'triple_r2svm',
'triple_r2elm']
r2_datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart',
'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine']
In [9]:
best_results_with_scale = {model: {} for model in models}
df = results_pd
for model in models:
for data in datasets:
df = results_pd[model + '_' + data]
best_results_with_scale[model][data] = df[df.scale==True]['mean_acc'].max()
print "Best Mean Accuracy for scale=True"
pd.DataFrame.from_dict(best_results_with_scale)
Out[9]:
In [10]:
best_results_without_scale = {model: {} for model in models}
df = results_pd
for model in models:
for data in datasets:
df = results_pd[model + '_' + data]
best_results_without_scale[model][data] = df[df.scale==False]['mean_acc'].max()
print "Best Mean Accuracy for scale=False"
pd.DataFrame.from_dict(best_results_without_scale)
Out[10]:
In [98]:
csv_results[test_r2svm]
Out[98]:
In [135]:
import math
best_results_scale = {model: {} for model in r2_models}
for model in r2_models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
best_results_scale[model][data] = df.loc[df['mean_acc'].idxmax(),'scale']
s = pd.DataFrame.from_dict(best_results_scale).values
print "Scale True:", (s == True).sum()
print "Scale False:", (s == False).sum()
print "Scale NaN:", sum([math.isnan(x) for x in s.ravel()])
In [137]:
best_results_rec = {model: {} for model in r2_models}
for model in r2_models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
best_results_rec[model][data] = df.loc[df['mean_acc'].idxmax(),'recurrent']
r = pd.DataFrame.from_dict(best_results_rec).values
print "Recurrent True:", (r == True).sum()
print "Recurrent False:", (r == False).sum()
print "Recurrent NaN:", sum([math.isnan(x) for x in r.ravel()])
In [139]:
best_results_prev = {model: {} for model in r2_models}
for model in r2_models:
for data in datasets:
if model + '_' + data in csv_results.keys():
df = csv_results[model + '_' + data]
best_results_prev[model][data] = df.loc[df['mean_acc'].idxmax(),'use_prev']
u = pd.DataFrame.from_dict(best_results_prev).values
print "Use_prev True:", (u == True).sum()
print "Use_prev False:", (u == False).sum()
print "Use_prev NaN:", sum([math.isnan(x) for x in u.ravel()])
In [20]:
best_results_beta = {model: {} for model in models}
for model in models:
for data in datasets:
df = results_pd[model + '_' + data]
best_results_beta[model][data] = df.loc[df['mean_acc'].idxmax(),'beta']
print "Use_prev parameter in best results"
pd.DataFrame.from_dict(best_results_beta)
Out[20]:
In [1]:
type(None)
Out[1]: