Validating if our results are sensible


In [2]:
%load_ext autoreload
%autoreload 2
import sys, os
sys.path.append('../..')

%matplotlib inline
import matplotlib.pylab as plt

from misc.config import c
from data_api import *
import cPickle
import pandas as pd
from data_api import *
results_dir = c['RESULTS_DIR']

In [3]:
all_results = {}

datasets = ['fourclass']
models = ['test_random']

paths = [ os.path.join(results_dir, model + '_' + dataset) for model in models for dataset in datasets ]

In [4]:
csv_results = {}
csv_dir = os.path.join(results_dir, 'csv')

for csv_file in os.listdir(csv_dir):
    print csv_file
    csv_results[csv_file] = pd.DataFrame.from_csv(os.path.join(csv_dir, csv_file))


test_svm_liver
fixed_r2svm_heart
fixed_r2svm_sonar
test_elm_bank
test_svm_sonar
test_linear_svm_segment
test_elm_diabetes
random_r2svm_fourclass
test_elm_wine
test_svm_segment
test_svm_german
fixed_r2svm_wine
test_linear_svm_iris
test_elm_splice
test_elm_iris
test_svm_satimage
fixed_r2svm_pendigits
test_linear_svm_breast_cancer
fixed_r2svm_german
test_svm_indian
test_linear_svm_liver
test_linear_svm_splice
random_r2svm_australian
fixed_r2svm_australian
test_linear_svm_bank
test_elm_segment
fixed_r2svm_liver
test_linear_svm_satimage
random_r2svm_liver
random_r2svm_iris
fixed_r2svm_crashes
test_linear_svm_australian
unit_test_r2svm_indian
fixed_r2svm_segment
test_linear_svm_indian
fixed_r2svm_indian
test_linear_svm_wine
random_r2svm_german
test_elm_fourclass
random_r2svm_heart
test_linear_svm_crashes
test_svm_bank
fixed_r2svm_glass
unit_test_r2svm_diabetes
test_elm_breast_cancer
random_r2svm_segment
random_r2svm_diabetes
random_r2svm_wine
test_svm_wine
random_r2svm_splice
test_svm_heart
test_linear_svm_heart
test_svm_australian
test_elm_glass
test_elm_heart
test_linear_svm_fourclass
random_r2svm_pendigits
test_linear_svm_ionosphere
fixed_r2svm_iris
test_svm_splice
test_svm_ionosphere
test_elm_australian
test_elm_ionosphere
fixed_r2svm_bank
test_linear_svm_diabetes
test_elm_liver
test_linear_svm_german
test_svm_iris
test_elm_crashes
test_svm_fourclass
fixed_r2svm_diabetes
test_svm_breast_cancer
test_svm_crashes
fixed_r2svm_satimage
test_elm_indian
test_elm_sonar
fixed_r2svm_fourclass
random_r2svm_glass
random_r2svm_indian
random_r2svm_crashes
fixed_r2svm_breast_cancer
random_r2svm_bank
test_svm_glass
test_svm_diabetes
test_linear_svm_sonar
random_r2svm_breast_cancer
fixed_r2svm_ionosphere
random_r2svm_sonar
test_elm_german
fixed_r2svm_splice
test_linear_svm_glass
random_r2svm_satimage
random_r2svm_ionosphere

In [18]:
f = csv_results['random_r2svm_segment']

In [74]:
len(V == 0.6438567)


Out[74]:
189

In [75]:
C[0:21]


Out[75]:
array([ 0.13533528,  0.13533528,  0.13533528,  0.13533528,  0.13533528,
        0.13533528,  0.13533528,  0.13533528,  0.13533528,  0.13533528,
        0.13533528,  0.13533528,  0.13533528,  0.13533528,  0.13533528,
        0.13533528,  0.13533528,  0.13533528,  0.13533528,  0.13533528,
        0.13533528])

In [68]:
f.iloc[1*21+3]


Out[68]:
C                                                     0.3678794
acc_fold      [ 0.6416185   0.6416185   0.64534884  0.645348...
clf                                                          []
data_name                                             fourclass
gamma                                               0.002478752
kernel                                                      rbf
mean_acc                                              0.6438567
n_class                                                       2
n_dim                                                         2
std                                                 0.001827486
test_time     [0.0036678314208984375, 0.0037109851837158203,...
train_time    [0.02165389060974121, 0.021739959716796875, 0....
Name: test_svc_fourclass_C0.367879441171_g0.00247875217667_, dtype: object

In [20]:
a = f[f['mean_acc'] == max(f['mean_acc'])]

In [24]:
a


Out[24]:
acc_fold best_depth beta clf data_name depth fit_c mean_acc n_class n_dim recurrent scale seed std test_time train_time use_prev
random_r2svm_segment_uT_rF_b0.10_d4_sT_fra_ [[ 0.93939394 0.92857143 0.93290043]\n [ 0.9... [4, 4, 4, 4, 4] 0.1 [] segment 4 random_cls 0.917172 7 19 False True 666 0.010908 [[0.001834869384765625, 0.00177001953125, 0.00... [[0.03561592102050781, 0.0375211238861084, 0.0... True

In [76]:
C = f['C'].values

In [77]:
V = f['mean_acc'].values

In [78]:
G = f['gamma'].values

In [79]:
g = set(G)

In [80]:
c = set(C)

In [81]:
len(g)


Out[81]:
21

In [82]:
len(c)


Out[82]:
14

In [38]:
C[0], C[20]


Out[38]:
(0.1353352832366127, 0.1353352832366127)

In [49]:
V[4*21 + 1]


Out[49]:
0.64385670116951199

In [45]:
set(C)


Out[45]:
{0.1353352832366127,
 0.36787944117144228,
 1.0,
 2.7182818284590446,
 7.3890560989306495,
 20.085536923187668,
 54.598150033144243,
 148.4131591025766,
 403.42879349273511}

In [48]:
set(G)


Out[48]:
{4.5399929762484854e-05,
 0.00012340980408667956,
 0.0003354626279025118,
 0.00091188196555451624,
 0.0024787521766663585,
 0.006737946999085467,
 0.018315638888734175,
 0.049787068367863938,
 0.1353352832366127,
 0.36787944117144228,
 1.0,
 2.7182818284590446,
 7.3890560989306495,
 20.085536923187668,
 54.598150033144243,
 148.4131591025766,
 403.42879349273511,
 1096.6331584284585,
 2980.9579870417283,
 8103.0839275753842,
 22026.465794806718}

In [83]:
plt.figure(figsize=(8, 6))
plt.imshow(V.reshape(14,21), interpolation='nearest', cmap=plt.cm.spectral)
plt.colorbar()
plt.show()



In [ ]:
f.loc[']

In [4]:
best_std = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in results_pd.keys():
            df = results_pd[model + '_' + data]
            scores = df.loc[df['mean_acc'].idxmax(),'acc_fold']
            best_std[model][data] =  np.mean([np.std(fold_scores) for fold_scores in scores]) * 100


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-f4b36cc6bb2e> in <module>()
      2 for model in models:
      3     for data in datasets:
----> 4         if model + '_' + data in results_pd.keys():
      5             df = results_pd[model + '_' + data]
      6             scores = df.loc[df['mean_acc'].idxmax(),'acc_fold']

NameError: name 'results_pd' is not defined

In [5]:
print "Best std"
pd.DataFrame.from_dict(best_std)


Best std
Out[5]:
test_svc