This notebook contains latex tables included in the publication

Also contains some analysis



In [3]:

    
%load_ext autoreload
%autoreload 2
import sys, os
sys.path.append('../..')

%matplotlib inline
import matplotlib.pylab as plt

from misc.config import c
from data_api import *
import cPickle
import pandas as pd
from data_api import *
results_dir = c['RESULTS_DIR']









    



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload



In [360]:

    
all_results = {}
models = ['exh_r2svm']

datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
            'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
            'vehicle']

# datasets = ['vowel', 'vehicle', 'satimage', 'segment', 'pendigits']

paths = [ os.path.join(results_dir, model + '_' + dataset) for model in models for dataset in datasets ]

for path in paths:
    if os.path.isdir(path):
        print path
        results = {}
        for exp in os.listdir(path):
            name = exp[:-11]
            try:
                exp_res = cPickle.load(open(os.path.join(path, exp),'r'))
            except:
                print exp
                continue
            merged_res = exp_res['monitors']
            merged_res.update(exp_res['results'])
            merged_res.update(exp_res['config']['params'])
            results[name] = merged_res
        name = path.split('/')[-1]
        all_results[name] = results









    



/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_glass
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_vowel
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_fourclass
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_ionosphere
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_sonar
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_splice
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_wine



In [4]:

    
csv_results = {}
csv_dir = os.path.join(results_dir, 'csv')

for csv_file in os.listdir(csv_dir):
    csv_results[csv_file] = pd.DataFrame.from_csv(os.path.join(csv_dir, csv_file))



In [196]:

    
df[df['recurrent'] == False]['mean_acc'].max()









    Out[196]:





0.81052002310187043



In [195]:









    Out[195]:





0.81704870051979184



In [24]:

    
from collections import defaultdict
def get_accuracy_table(models=['test_elm', 'test_svm', 'test_linear_svm',\
                               'test_r2svm', 'test_r2elm', 'random_r2svm', 'fixed_r2svm', 
            'triple_svm', 'triple_r2svm', 'triple_fixed_r2svm'], \
                       datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'mushrooms', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']
                       ):
    pd.options.display.float_format = '{:2.2f}'.format

    best_results = {model: {} for model in models}
    best_results_val = {data: {} for data in datasets}
    
    best_per_dataset = defaultdict(float)
    for model in models:
        for data in datasets:
            if model + '_' + data in csv_results.keys():
                df = csv_results[model + '_' + data]
                best_per_dataset[data] = max(best_per_dataset[data], df['mean_acc'].max())
    
    for model in models:
        for data in datasets:
            if model + '_' + data in csv_results.keys():
                df = csv_results[model + '_' + data]
                acc = df['mean_acc'].max()
                if 'std' in df.columns:
                    std = '%.2f' % df.loc[df['mean_acc'].idxmax(),'std']
                else:
                    std = '0.01'
                if acc == best_per_dataset[data]:
                    txt = "\textb{"+("%.2f"%acc)+"} & {\\tiny  $ \pm $\textb{"+std+"} }" #TODO: add STD estimation
                else:
                    txt = ("%.2f"%acc)+ " & {\\tiny $ \pm $" + "0.01 }"
                              
                best_results[model][data] = txt
                best_results_val[data][model] = acc


    return pd.DataFrame.from_dict(best_results)[models], best_results_val



In [148]:

1st table is all models and only fixed_r2svm



In [13]:

    
from itertools import izip



In [27]:

    
models=[ 'test_r2svm','fixed_r2svm','test_r2elm', \
        'test_elm', 'test_svm', 'test_linear_svm',\
            'triple_r2svm', 'triple_svm', 'triple_fixed_r2svm']

datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4', 'mushrooms']
tb, _ = get_accuracy_table(models=models, datasets = datasets)
# 1. Change column names
tb = tb.rename(columns=dict(izip(models,
                                 ["\rrsvm", "fixed \\rrsvm", "\\drelm", \
                                  "ELM + SIG", "SVM + RBF", "SVM" \
                                  "triple \rrsvm", "triple SVM+RBF", "triple f \rrsvm"])))


h = tb.to_latex(index=True, header=True, formatters=[lambda x:x]*10)

h = h.replace("\\&", "&").replace("\\textbackslash", "\\").replace("\\$", "$").replace("extb", "\\textbf").replace("\\{", "{").replace("\\}", "}").replace("\\\\%", "\\%")

print h









    



\begin{tabular}{llllllllll}
\toprule
{} &                       \rrsvm &                 fixed \rrsvm &                                      \drelm &                                   ELM + SIG &                                   SVM + RBF &             SVMtriple \rrsvm &                              triple SVM+RBF &                             triple f \rrsvm &                          triple\_fixed\_r2svm \\
\midrule
australian    &  0.87 & {\tiny $ \pm $0.01 } &  0.86 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  	\textbf{0.88} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.87 & {\tiny $ \pm $0.01 } &  0.86 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } \\
bank          &  1.00 & {\tiny $ \pm $0.01 } &  1.00 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.99 & {\tiny $ \pm $0.01 } &                 1.00 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } \\
breast\_cancer &  0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.01} } \\
crashes       &  0.95 & {\tiny $ \pm $0.01 } &  0.95 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &  	\textbf{0.96} & {\tiny  $ \pm $	\textbf{0.02} } &  0.96 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } &                 0.96 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } \\
diabetes      &  0.78 & {\tiny $ \pm $0.01 } &  0.76 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } &  	\textbf{0.78} & {\tiny  $ \pm $	\textbf{0.03} } &                 0.78 & {\tiny $ \pm $0.01 } &  0.78 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.78 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } \\
fourclass     &  0.79 & {\tiny $ \pm $0.01 } &  0.74 & {\tiny $ \pm $0.01 } &                 0.78 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.77 & {\tiny $ \pm $0.01 } &                 0.81 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.77 & {\tiny $ \pm $0.01 } \\
german        &  0.77 & {\tiny $ \pm $0.01 } &  0.73 & {\tiny $ \pm $0.01 } &  	\textbf{0.77} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.75 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &  0.76 & {\tiny $ \pm $0.01 } &                 0.73 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } \\
glass         &  0.64 & {\tiny $ \pm $0.01 } &  0.59 & {\tiny $ \pm $0.01 } &                 0.64 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } &  	\textbf{0.73} & {\tiny  $ \pm $	\textbf{0.05} } &  0.62 & {\tiny $ \pm $0.01 } &                 0.69 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &                 0.65 & {\tiny $ \pm $0.01 } \\
heart         &  0.84 & {\tiny $ \pm $0.01 } &  0.85 & {\tiny $ \pm $0.01 } &  	\textbf{0.85} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.83 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } \\
indian        &  0.73 & {\tiny $ \pm $0.01 } &  0.71 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &  	\textbf{0.73} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.72 & {\tiny $ \pm $0.01 } &  0.72 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } \\
ionosphere    &  0.89 & {\tiny $ \pm $0.01 } &  0.91 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } &                 0.91 & {\tiny $ \pm $0.01 } &  	\textbf{0.96} & {\tiny  $ \pm $	\textbf{0.02} } &  0.89 & {\tiny $ \pm $0.01 } &                 0.89 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } \\
iris          &  0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.96 & {\tiny $ \pm $0.01 } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.02} } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.03} } &  0.95 & {\tiny $ \pm $0.01 } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.98 & {\tiny $ \pm $0.01 } \\
liver         &  0.70 & {\tiny $ \pm $0.01 } &  0.66 & {\tiny $ \pm $0.01 } &                 0.69 & {\tiny $ \pm $0.01 } &                 0.74 & {\tiny $ \pm $0.01 } &  	\textbf{0.75} & {\tiny  $ \pm $	\textbf{0.04} } &  0.71 & {\tiny $ \pm $0.01 } &                 0.70 & {\tiny $ \pm $0.01 } &                 0.73 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } \\
pendigits     &                          NaN &  0.94 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.93 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &                 1.00 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } \\
satimage      &                          NaN &  0.87 & {\tiny $ \pm $0.01 } &                 0.88 & {\tiny $ \pm $0.01 } &                 0.89 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &  0.82 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.00} } &                                         NaN \\
segment       &                          NaN &  0.93 & {\tiny $ \pm $0.01 } &                 0.94 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  0.93 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.97 & {\tiny $ \pm $0.01 } &                                         NaN \\
sonar         &  0.75 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.89} & {\tiny  $ \pm $	\textbf{0.05} } &  0.76 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } \\
splice        &  0.81 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.88 & {\tiny $ \pm $0.01 } &  0.80 & {\tiny $ \pm $0.01 } &                 0.81 & {\tiny $ \pm $0.01 } &  	\textbf{0.88} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.88 & {\tiny $ \pm $0.01 } \\
svmguide2     &  0.83 & {\tiny $ \pm $0.01 } &  0.82 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.85} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.84 & {\tiny $ \pm $0.01 } \\
svmguide4     &  0.85 & {\tiny $ \pm $0.01 } &  0.73 & {\tiny $ \pm $0.01 } &                 0.80 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &  	\textbf{0.90} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } \\
vehicle       &  0.81 & {\tiny $ \pm $0.01 } &  0.78 & {\tiny $ \pm $0.01 } &                 0.80 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &  	\textbf{0.86} & {\tiny  $ \pm $	\textbf{0.01} } &  0.78 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } \\
vowel         &  0.62 & {\tiny $ \pm $0.01 } &  0.49 & {\tiny $ \pm $0.01 } &                 0.54 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &  0.47 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.72 & {\tiny $ \pm $0.01 } \\
wine          &  0.83 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.87} & {\tiny  $ \pm $	\textbf{0.04} } &                 0.86 & {\tiny $ \pm $0.01 } &  0.83 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } \\
\bottomrule
\end{tabular}

STD



In [6]:

    
models = ['test_elm', 'test_svm', 'test_linear_svm','test_r2svm', 'test_r2elm', 'random_r2svm', 'fixed_r2svm', 
        'triple_svm', 'triple_r2svm', 'triple_fixed_r2svm'] #,exh_r2svm]
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
            'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
            'vehicle', 'svmguide2', 'svmguide4']

pd.options.display.float_format = '{:2.4f}'.format



In [15]:

    
best_std = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            if 'std' in df.columns:
                best_std[model][data] = df.loc[df['mean_acc'].idxmax(),'std'] * 100
#             scores = df.loc[df['mean_acc'].idxmax(),'acc_fold']
#             best_std[model][data] =  np.mean([np.std(fold_scores) for fold_scores in scores]) * 100

print "Best std"
std_pd = pd.DataFrame.from_dict(best_std)









    



Best std

training time



In [32]:

    
best_train_time = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            time = df.loc[df['mean_acc'].idxmax(),'train_time']
            time = time.translate(None, '[]')
            best_train_time[model][data] = np.mean([float(t) for t in time.split(', ')])

pd.options.display.float_format = '{:2.4f}'.format
print "Best train time"
pd.DataFrame.from_dict(best_train_time)









    



Best train time






    Out[32]:






  
    
      
      fixed_r2svm
      test_elm
      test_linear_svm
      test_r2elm
      test_r2svm
      test_svm
      triple_fixed_r2svm
      triple_r2svm
      triple_svm
    
  
  
    
      australian
      0.1464
      0.3201
      0.0236
      1.0581
      1.8978
      0.0183
      0.2988
       2.6481
      0.0296
    
    
      bank
      0.0156
      0.6873
      0.0026
      1.8457
      0.2944
      0.0315
      0.0217
       0.1738
      0.0089
    
    
      breast_cancer
      0.0202
      0.4864
      0.0016
      0.2297
      0.6347
      0.0042
      0.1046
       0.1283
      0.0361
    
    
      crashes
      0.0464
      0.2320
      0.0026
      0.1179
      0.3964
      0.0055
      0.1050
       0.3025
      0.0070
    
    
      diabetes
      0.0150
      0.4592
      0.0604
      0.1235
      0.9053
      0.0169
      0.0402
       2.4775
      0.0237
    
    
      fourclass
      0.0501
      0.4847
      0.0207
      0.1171
      0.8090
      0.0100
      0.0621
       0.5610
      0.0055
    
    
      german
      0.1978
      0.3636
      0.0081
      0.3982
      2.6623
      0.0387
      0.4986
       6.4902
      0.0594
    
    
      glass
      0.0961
      0.0383
      0.0473
      0.0738
      1.1615
      0.0037
      0.1582
       1.0639
      0.0042
    
    
      heart
      0.0056
      0.0016
      0.0042
      0.2770
      0.4036
      0.0025
      0.0184
       0.1524
      0.0026
    
    
      indian
      0.0581
      0.2803
      0.0554
      0.3607
      1.5226
      0.0309
      0.1232
       1.6489
      0.0172
    
    
      ionosphere
      0.0676
      0.6896
      0.0059
      0.1812
      0.4340
      0.0036
      0.1337
       0.1612
      0.0054
    
    
      iris
      0.0161
      0.5061
      0.0061
      0.0745
      0.2472
      0.0008
      0.0079
       0.0134
      0.0006
    
    
      liver
      0.0272
      0.1072
      0.0286
      0.0745
      0.4234
      0.0070
      0.0483
       0.4191
      0.0051
    
    
      pendigits
      3.1311
      2.3122
      0.8613
      0.4511
         nan
      3.6527
      6.5578
       2.2639
      1.1979
    
    
      satimage
      7.4973
      0.6859
      0.6576
      0.7384
         nan
      6.7918
         nan
      84.3363
      3.2430
    
    
      segment
      0.5565
      0.9811
      0.3835
      0.1546
         nan
      0.1247
         nan
       0.6978
      0.0758
    
    
      sonar
      0.1061
      0.0415
      0.0061
      0.1048
      0.3438
      0.0042
      0.2885
       0.1233
      0.0052
    
    
      splice
      0.3478
      0.2885
      0.0048
      1.9940
      7.0942
      0.0975
      0.8358
      10.7936
      0.1778
    
    
      svmguide2
      0.0180
      0.0086
      0.0122
      0.0972
      1.6404
      0.0065
      0.1095
       1.4786
      0.0093
    
    
      svmguide4
      0.2906
      0.0197
      0.1181
      0.1182
      3.0294
      0.0204
      0.4753
       1.8570
      0.0201
    
    
      vehicle
      0.3065
      0.0275
      0.0927
      1.3264
      3.0749
      0.0627
      1.0007
       4.7532
      0.0398
    
    
      vowel
      0.5807
      0.0328
      0.3102
      0.1891
      8.3620
      0.0732
      1.3480
      11.7284
      0.0995
    
    
      wine
      0.0092
      0.4211
      0.0029
      0.0791
      0.3525
      0.0021
      0.0284
       0.2508
      0.0012

test time



In [31]:

    
best_test_time = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            time = df.loc[df['mean_acc'].idxmax(),'test_time']
            time = time.translate(None, '[]')
            best_test_time[model][data] = np.mean([float(t) for t in time.split(', ')])

            
pd.options.display.float_format = '{:2.6f}'.format
print "Best testtime"
pd.DataFrame.from_dict(best_test_time)  

# multiply those?









    



Best testtime






    Out[31]:






  
    
      
      fixed_r2svm
      test_elm
      test_linear_svm
      test_r2elm
      test_r2svm
      test_svm
      triple_fixed_r2svm
      triple_r2svm
      triple_svm
    
  
  
    
      australian
      0.002008
      0.001027
      0.000215
      0.013957
      0.005982
      0.003588
      0.004445
      0.008004
      0.002292
    
    
      bank
      0.000993
      0.012703
      0.000085
      0.013209
      0.004771
      0.004017
      0.000880
      0.006706
      0.001405
    
    
      breast_cancer
      0.001906
      0.002688
      0.000102
      0.006456
      0.005233
      0.000782
      0.001782
      0.005418
      0.001190
    
    
      crashes
      0.001004
      0.001667
      0.000072
      0.006249
      0.005186
      0.000592
      0.001421
      0.010838
      0.000794
    
    
      diabetes
      0.001311
      0.001376
      0.000206
      0.005798
      0.004459
      0.002743
      0.002913
      0.008306
      0.003584
    
    
      fourclass
      0.000938
      0.005748
      0.000291
      0.004168
      0.004417
      0.001461
      0.001573
      0.004966
      0.000840
    
    
      german
      0.003616
      0.004874
      0.000950
      0.015309
      0.007216
      0.006229
      0.002720
      0.017894
      0.010373
    
    
      glass
      0.001830
      0.000177
      0.000141
      0.003144
      0.003556
      0.000359
      0.001577
      0.003992
      0.000602
    
    
      heart
      0.000545
      0.000116
      0.000096
      0.006033
      0.004101
      0.000368
      0.000909
      0.004397
      0.000514
    
    
      indian
      0.001182
      0.000624
      0.000195
      0.007713
      0.004716
      0.006192
      0.002322
      0.005405
      0.003123
    
    
      ionosphere
      0.003337
      0.006975
      0.000074
      0.008884
      0.005969
      0.000529
      0.004737
      0.011043
      0.000815
    
    
      iris
      0.000467
      0.000470
      0.000084
      0.002246
      0.002429
      0.000122
      0.000717
      0.002623
      0.000094
    
    
      liver
      0.001208
      0.000285
      0.000123
      0.003064
      0.003224
      0.000679
      0.000902
      0.004558
      0.000932
    
    
      pendigits
      0.013302
      0.033696
      0.001076
      0.029512
           nan
      0.803777
      0.030834
      0.053239
      0.274664
    
    
      satimage
      0.022120
      0.021465
      0.003465
      0.057006
           nan
      1.170001
           nan
      0.127460
      0.887032
    
    
      segment
      0.004300
      0.010996
      0.001712
      0.006736
           nan
      0.022020
           nan
      0.016372
      0.014802
    
    
      sonar
      0.003501
      0.000289
      0.000171
      0.007144
      0.004569
      0.000655
      0.006103
      0.011293
      0.000904
    
    
      splice
      0.008873
      0.001800
      0.000137
      0.055508
      0.009990
      0.009210
      0.014992
      0.038572
      0.028539
    
    
      svmguide2
      0.000582
      0.000470
      0.000125
      0.004489
      0.004324
      0.001134
      0.001620
      0.004627
      0.001673
    
    
      svmguide4
      0.001378
      0.000820
      0.000169
      0.003975
      0.004488
      0.001869
      0.001108
      0.004656
      0.002286
    
    
      vehicle
      0.002673
      0.001297
      0.000219
      0.013950
      0.005454
      0.003311
      0.002565
      0.008661
      0.005645
    
    
      vowel
      0.003589
      0.001556
      0.000264
      0.007744
      0.007480
      0.008195
      0.005276
      0.005606
      0.011099
    
    
      wine
      0.000786
      0.000346
      0.000140
      0.002799
      0.002546
      0.000150
      0.000757
      0.002801
      0.000210

2. Analysis



In [375]:

    
[('test_r2svm' in result_dict[d], d) for d in datasets]









    Out[375]:





[(True, 'glass'),
 (True, 'australian'),
 (True, 'bank'),
 (True, 'breast_cancer'),
 (True, 'crashes'),
 (True, 'liver'),
 (False, 'segment'),
 (False, 'satimage'),
 (True, 'heart'),
 (True, 'vowel'),
 (True, 'diabetes'),
 (True, 'fourclass'),
 (True, 'german'),
 (True, 'indian'),
 (True, 'ionosphere'),
 (True, 'sonar'),
 (True, 'splice'),
 (True, 'iris'),
 (True, 'wine'),
 (False, 'pendigits'),
 (True, 'vehicle'),
 (True, 'svmguide2'),
 (True, 'svmguide4')]



In [376]:

    
tb, result_dict = get_accuracy_table()
datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']


points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], (result_dict[d]["test_svm"] - result_dict[d]["test_linear_svm"])) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))









    



(-0.9112781954887218, 2.3360131773309473e-08)



In [416]:

    
datasets = list(set(datasets).difference(set(["segment", "pendigits", "satimage"])))


points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], spaceness(d)) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))









    



(0.68877342555229382, 0.00078420043051622259)



In [414]:

    
# Define datasets tested

binary = ["australian", "bank", "breast_cancer", "crashes", "diabetes",\
          "fourclass", "german", "heart", "indian", "ionosphere", "liver", "sonar", "splice"]
multi = set(datasets).difference(set(binary))

datasetss=['australian',\
 'bank',\
 'breast_cancer',\
 'crashes',\
 'diabetes',\
 'fourclass',\
 'german',\
 'glass',\
 'heart',\
 'indian',\
 'ionosphere',\
 'iris',\
 'liver',\
 'mushrooms',\
 'pendigits',\
 'satimage',\
 'segment',\
 'sonar',\
 'splice',\
 'svmguide2',\
 'svmguide4',\
 'vehicle',\
 'vowel',\
 'wine']
dim = [14,4,10,20,8,2,24,9,13,10,34,4,6,112,16,36,19,60,60,20,10,18,10,4]
manifolds = [1,3,1,1,2,2,3,6,3,3,24,2,3,40,9,6,7,28,55,15,1,6,8,2]
dimd = {}
manifoldsd = {}
for i in range(len(datasetss)):
    dimd[datasetss[i]] = dim[i]
    manifoldsd[datasetss[i]] = manifolds[i]

def spaceness(i):
    N, M = dimd[i], manifoldsd[i]
    return (N-M)/float(N)
tb, result_dict = get_accuracy_table()

T = 0.04
T_N = 0.35

rbf_stronger = [d for d in datasets if (result_dict[d]["test_svm"] - result_dict[d]["test_linear_svm"])/abs(result_dict[d]["test_svm"]) > T]
rbf_similar = set(datasets).difference(set(rbf_stronger))

spaced = [d for d,N,M in izip(datasets,dim,manifolds) if (N-M)/float(N) > T_N]
compact = set(datasets).difference(set(spaced))

imbalanced = [d for d in datasets if disbalance(d) > 0.15]
balanced = set(datasets).difference(imbalanced)

datasets_groups = {"bal":balanced, "bin":binary, "multi":multi, "rbf_str":rbf_stronger, "rbf_sim":rbf_similar, "spaced":spaced, "comp":compact, \
                   "im":imbalanced}



In [421]:

    
# Define predicates tested tested

def model_stronger_than_11lin(model, datasets):
    return sum([result_dict[d][model] > 1.1*result_dict[d]['test_linear_svm'] for d in datasets])

def model_stronger_than_09rbf(model, datasets):
    return sum([result_dict[d][model] > 0.98*result_dict[d]['test_svm'] for d in datasets])

def is_stronger_key_on_data(model, data, key='recurrent', threshold=0.95):
    try:
        df = csv_results[model + '_' + data]
        acc_rec = df[df[key] == True]['mean_acc'].max()
        acc_normal = df[df[key] == False]['mean_acc'].max()
        return acc_normal/acc_rec < 0.96
    except:
        return False
    

def is_weaker_key_on_data(model, data, key='recurrent', threshold=0.95):
    try:
        df = csv_results[model + '_' + data]
        acc_rec = df[df[key] == True]['mean_acc'].max()
        acc_normal = df[df[key] == False]['mean_acc'].max()
        return acc_rec/acc_normal < 0.96
    except:
        return False
    
def is_stronger_recurrent(model, datasets):
    return sum(is_stronger_key_on_data(model, d) for d in datasets)

def is_weaer_recurrent(model, datasets):
    return sum(is_weaker_key_on_data(model, d) for d in datasets)


def is_stronger_scale(model, datasets):
    return sum(is_stronger_key_on_data(model, d, key='scale') for d in datasets)

def is_stronger_use_prev(model, datasets):
    return sum(is_stronger_key_on_data(model, d, key='use_prev') for d in datasets)

predicates = {"str_than_11lin":model_stronger_than_11lin, \
              "str_than_09rbf":model_stronger_than_09rbf, \
              "str_rec":is_stronger_recurrent,\
              "str_scale":is_stronger_scale, \
              "str_use_preV":is_stronger_use_prev, \
              "weak_rec": is_weaer_recurrent}



In [323]:

    
is_stronger_key_on_data("test_r2svm", "glass", key="scale")









    



0.592894392429 0.581876852691






    Out[323]:





True



In [314]:

    
from itertools import product



In [422]:

    
def get_model_char(model=["test_r2svm"]):
    results = {k:{} for k in datasets_groups.keys()} 
    for pred_key,d_group_key in product(predicates.keys(), datasets_groups):
        try:
            d_group = datasets_groups[d_group_key]
            pred = predicates[pred_key]
            sum_len, sum_count = 0, 0
            for m in model:
                sum_len += len(d_group)
                sum_count += pred(m, d_group)
                
            results[d_group_key][pred_key] = "%d"%(100.*sum_count/float(sum_len))+" "+ str(sum_count) + "/" + str(sum_len)
        except:
            pass
    return pd.DataFrame.from_dict(results)

t = get_model_char()



In [423]:

    
get_model_char(["test_r2elm", "test_r2svm"])









    Out[423]:






  
    
      
      bal
      bin
      comp
      im
      multi
      rbf_sim
      rbf_str
      spaced
    
  
  
    
      str_rec
        0 0/26
         3 1/26
        7 1/14
         5 1/20
        0 0/20
         0 0/22
       4 1/24
       0 0/32
    
    
      str_scale
       11 3/26
         7 2/26
       35 5/14
        10 2/20
       15 3/20
        18 4/22
       4 1/24
       0 0/32
    
    
      str_than_09rbf
           NaN
       57 15/26
           NaN
       60 12/20
           NaN
       81 18/22
          NaN
          NaN
    
    
      str_than_11lin
           NaN
         0 0/26
           NaN
         0 0/20
           NaN
         0 0/22
          NaN
          NaN
    
    
      str_use_preV
        0 0/26
         3 1/26
        7 1/14
         5 1/20
        0 0/20
         0 0/22
       4 1/24
       0 0/32
    
    
      weak_rec
        0 0/26
         0 0/26
        0 0/14
         0 0/20
        0 0/20
         0 0/22
       0 0/24
       0 0/32



In [328]:

    
get_model_char("test_r2svm")









    Out[328]:






  
    
      
      bin
      comp
      im
      multi
      rbf_sim
      rbf_str
      spaced
    
  
  
    
      str_rec
        0 0/13
        0 0/8
         0 0/15
       10 1/10
         0 0/18
       20 1/5
        6 1/15
    
    
      str_scale
        7 1/13
       37 3/8
        13 2/15
       40 4/10
        22 4/18
       20 1/5
       13 2/15
    
    
      str_than_09rbf
       61 8/13
       75 6/8
       73 11/15
       70 7/10
       77 14/18
       20 1/5
       60 9/15
    
    
      str_than_11lin
        0 0/13
       12 1/8
         6 1/15
       30 3/10
         5 1/18
       40 2/5
       13 2/15
    
    
      str_use_preV
        0 0/13
        0 0/8
         0 0/15
       10 1/10
         0 0/18
       20 1/5
        6 1/15



In [309]:

    
get_model_char("triple_fixed_r2svm")









    Out[309]:






  
    
      
      bin
      comp
      im
      multi
      rbf_sim
      rbf_str
      spaced
    
  
  
    
      str_rec
        0 0/13
        0 0/8
       0 0/15
       0 0/10
       0 0/18
       0 0/5
       0 0/15
    
    
      str_scale
        0 0/13
        0 0/8
       0 0/15
       0 0/10
       0 0/18
       0 0/5
       0 0/15
    
    
      str_than_09rbf
       69 9/13
       62 5/8
          NaN
          NaN
          NaN
         NaN
          NaN
    
    
      str_than_11lin
       15 2/13
        0 0/8
          NaN
          NaN
          NaN
         NaN
          NaN
    
    
      str_use_preV
        0 0/13
        0 0/8
       0 0/15
       0 0/10
       0 0/18
       0 0/5
       0 0/15



In [ ]:

Misc analysis



In [242]:

    
tb, result_dict = get_accuracy_table()
datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']
points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], disbalance(d)) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))









    



(0.10781409823989753, 0.62437793810711406)



In [227]:

    
from collections import Counter



In [228]:

    
glass =



In [ ]:

    
Counter(glass.target)



In [247]:

    
def disbalance(name="glass"):
    data = fetch_uci_datasets([name])[0]
    c = Counter(data.target)
    return (max(c.values()) - min(c.values()))/(float(len(data.target)))



In [250]:

    
disbalance('australian')









    Out[250]:





0.11014492753623188



In [240]:









    Out[240]:





11.166666666666666

	fixed_r2svm	test_elm	test_linear_svm	test_r2elm	test_r2svm	test_svm	triple_fixed_r2svm	triple_r2svm	triple_svm
australian	0.1464	0.3201	0.0236	1.0581	1.8978	0.0183	0.2988	2.6481	0.0296
bank	0.0156	0.6873	0.0026	1.8457	0.2944	0.0315	0.0217	0.1738	0.0089
breast_cancer	0.0202	0.4864	0.0016	0.2297	0.6347	0.0042	0.1046	0.1283	0.0361
crashes	0.0464	0.2320	0.0026	0.1179	0.3964	0.0055	0.1050	0.3025	0.0070
diabetes	0.0150	0.4592	0.0604	0.1235	0.9053	0.0169	0.0402	2.4775	0.0237
fourclass	0.0501	0.4847	0.0207	0.1171	0.8090	0.0100	0.0621	0.5610	0.0055
german	0.1978	0.3636	0.0081	0.3982	2.6623	0.0387	0.4986	6.4902	0.0594
glass	0.0961	0.0383	0.0473	0.0738	1.1615	0.0037	0.1582	1.0639	0.0042
heart	0.0056	0.0016	0.0042	0.2770	0.4036	0.0025	0.0184	0.1524	0.0026
indian	0.0581	0.2803	0.0554	0.3607	1.5226	0.0309	0.1232	1.6489	0.0172
ionosphere	0.0676	0.6896	0.0059	0.1812	0.4340	0.0036	0.1337	0.1612	0.0054
iris	0.0161	0.5061	0.0061	0.0745	0.2472	0.0008	0.0079	0.0134	0.0006
liver	0.0272	0.1072	0.0286	0.0745	0.4234	0.0070	0.0483	0.4191	0.0051
pendigits	3.1311	2.3122	0.8613	0.4511	nan	3.6527	6.5578	2.2639	1.1979
satimage	7.4973	0.6859	0.6576	0.7384	nan	6.7918	nan	84.3363	3.2430
segment	0.5565	0.9811	0.3835	0.1546	nan	0.1247	nan	0.6978	0.0758
sonar	0.1061	0.0415	0.0061	0.1048	0.3438	0.0042	0.2885	0.1233	0.0052
splice	0.3478	0.2885	0.0048	1.9940	7.0942	0.0975	0.8358	10.7936	0.1778
svmguide2	0.0180	0.0086	0.0122	0.0972	1.6404	0.0065	0.1095	1.4786	0.0093
svmguide4	0.2906	0.0197	0.1181	0.1182	3.0294	0.0204	0.4753	1.8570	0.0201
vehicle	0.3065	0.0275	0.0927	1.3264	3.0749	0.0627	1.0007	4.7532	0.0398
vowel	0.5807	0.0328	0.3102	0.1891	8.3620	0.0732	1.3480	11.7284	0.0995
wine	0.0092	0.4211	0.0029	0.0791	0.3525	0.0021	0.0284	0.2508	0.0012

	fixed_r2svm	test_elm	test_linear_svm	test_r2elm	test_r2svm	test_svm	triple_fixed_r2svm	triple_r2svm	triple_svm
australian	0.002008	0.001027	0.000215	0.013957	0.005982	0.003588	0.004445	0.008004	0.002292
bank	0.000993	0.012703	0.000085	0.013209	0.004771	0.004017	0.000880	0.006706	0.001405
breast_cancer	0.001906	0.002688	0.000102	0.006456	0.005233	0.000782	0.001782	0.005418	0.001190
crashes	0.001004	0.001667	0.000072	0.006249	0.005186	0.000592	0.001421	0.010838	0.000794
diabetes	0.001311	0.001376	0.000206	0.005798	0.004459	0.002743	0.002913	0.008306	0.003584
fourclass	0.000938	0.005748	0.000291	0.004168	0.004417	0.001461	0.001573	0.004966	0.000840
german	0.003616	0.004874	0.000950	0.015309	0.007216	0.006229	0.002720	0.017894	0.010373
glass	0.001830	0.000177	0.000141	0.003144	0.003556	0.000359	0.001577	0.003992	0.000602
heart	0.000545	0.000116	0.000096	0.006033	0.004101	0.000368	0.000909	0.004397	0.000514
indian	0.001182	0.000624	0.000195	0.007713	0.004716	0.006192	0.002322	0.005405	0.003123
ionosphere	0.003337	0.006975	0.000074	0.008884	0.005969	0.000529	0.004737	0.011043	0.000815
iris	0.000467	0.000470	0.000084	0.002246	0.002429	0.000122	0.000717	0.002623	0.000094
liver	0.001208	0.000285	0.000123	0.003064	0.003224	0.000679	0.000902	0.004558	0.000932
pendigits	0.013302	0.033696	0.001076	0.029512	nan	0.803777	0.030834	0.053239	0.274664
satimage	0.022120	0.021465	0.003465	0.057006	nan	1.170001	nan	0.127460	0.887032
segment	0.004300	0.010996	0.001712	0.006736	nan	0.022020	nan	0.016372	0.014802
sonar	0.003501	0.000289	0.000171	0.007144	0.004569	0.000655	0.006103	0.011293	0.000904
splice	0.008873	0.001800	0.000137	0.055508	0.009990	0.009210	0.014992	0.038572	0.028539
svmguide2	0.000582	0.000470	0.000125	0.004489	0.004324	0.001134	0.001620	0.004627	0.001673
svmguide4	0.001378	0.000820	0.000169	0.003975	0.004488	0.001869	0.001108	0.004656	0.002286
vehicle	0.002673	0.001297	0.000219	0.013950	0.005454	0.003311	0.002565	0.008661	0.005645
vowel	0.003589	0.001556	0.000264	0.007744	0.007480	0.008195	0.005276	0.005606	0.011099
wine	0.000786	0.000346	0.000140	0.002799	0.002546	0.000150	0.000757	0.002801	0.000210

	bal	bin	comp	im	multi	rbf_sim	rbf_str	spaced
str_rec	0 0/26	3 1/26	7 1/14	5 1/20	0 0/20	0 0/22	4 1/24	0 0/32
str_scale	11 3/26	7 2/26	35 5/14	10 2/20	15 3/20	18 4/22	4 1/24	0 0/32
str_than_09rbf	NaN	57 15/26	NaN	60 12/20	NaN	81 18/22	NaN	NaN
str_than_11lin	NaN	0 0/26	NaN	0 0/20	NaN	0 0/22	NaN	NaN
str_use_preV	0 0/26	3 1/26	7 1/14	5 1/20	0 0/20	0 0/22	4 1/24	0 0/32
weak_rec	0 0/26	0 0/26	0 0/14	0 0/20	0 0/20	0 0/22	0 0/24	0 0/32

	bin	comp	im	multi	rbf_sim	rbf_str	spaced
str_rec	0 0/13	0 0/8	0 0/15	10 1/10	0 0/18	20 1/5	6 1/15
str_scale	7 1/13	37 3/8	13 2/15	40 4/10	22 4/18	20 1/5	13 2/15
str_than_09rbf	61 8/13	75 6/8	73 11/15	70 7/10	77 14/18	20 1/5	60 9/15
str_than_11lin	0 0/13	12 1/8	6 1/15	30 3/10	5 1/18	40 2/5	13 2/15
str_use_preV	0 0/13	0 0/8	0 0/15	10 1/10	0 0/18	20 1/5	6 1/15