This notebook contains latex tables included in the publication

Also contains some analysis


In [3]:
%load_ext autoreload
%autoreload 2
import sys, os
sys.path.append('../..')

%matplotlib inline
import matplotlib.pylab as plt

from misc.config import c
from data_api import *
import cPickle
import pandas as pd
from data_api import *
results_dir = c['RESULTS_DIR']


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [360]:
all_results = {}
models = ['exh_r2svm']

datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
            'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
            'vehicle']

# datasets = ['vowel', 'vehicle', 'satimage', 'segment', 'pendigits']

paths = [ os.path.join(results_dir, model + '_' + dataset) for model in models for dataset in datasets ]

for path in paths:
    if os.path.isdir(path):
        print path
        results = {}
        for exp in os.listdir(path):
            name = exp[:-11]
            try:
                exp_res = cPickle.load(open(os.path.join(path, exp),'r'))
            except:
                print exp
                continue
            merged_res = exp_res['monitors']
            merged_res.update(exp_res['results'])
            merged_res.update(exp_res['config']['params'])
            results[name] = merged_res
        name = path.split('/')[-1]
        all_results[name] = results


/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_glass
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_vowel
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_fourclass
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_ionosphere
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_sonar
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_splice
/mnt/users/czarnecki/local/r2-learner/results/exh_r2svm_wine

In [4]:
csv_results = {}
csv_dir = os.path.join(results_dir, 'csv')

for csv_file in os.listdir(csv_dir):
    csv_results[csv_file] = pd.DataFrame.from_csv(os.path.join(csv_dir, csv_file))

In [196]:
df[df['recurrent'] == False]['mean_acc'].max()


Out[196]:
0.81052002310187043

In [195]:



Out[195]:
0.81704870051979184

In [24]:
from collections import defaultdict
def get_accuracy_table(models=['test_elm', 'test_svm', 'test_linear_svm',\
                               'test_r2svm', 'test_r2elm', 'random_r2svm', 'fixed_r2svm', 
            'triple_svm', 'triple_r2svm', 'triple_fixed_r2svm'], \
                       datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'mushrooms', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']
                       ):
    pd.options.display.float_format = '{:2.2f}'.format

    best_results = {model: {} for model in models}
    best_results_val = {data: {} for data in datasets}
    
    best_per_dataset = defaultdict(float)
    for model in models:
        for data in datasets:
            if model + '_' + data in csv_results.keys():
                df = csv_results[model + '_' + data]
                best_per_dataset[data] = max(best_per_dataset[data], df['mean_acc'].max())
    
    for model in models:
        for data in datasets:
            if model + '_' + data in csv_results.keys():
                df = csv_results[model + '_' + data]
                acc = df['mean_acc'].max()
                if 'std' in df.columns:
                    std = '%.2f' % df.loc[df['mean_acc'].idxmax(),'std']
                else:
                    std = '0.01'
                if acc == best_per_dataset[data]:
                    txt = "\textb{"+("%.2f"%acc)+"} & {\\tiny  $ \pm $\textb{"+std+"} }" #TODO: add STD estimation
                else:
                    txt = ("%.2f"%acc)+ " & {\\tiny $ \pm $" + "0.01 }"
                              
                best_results[model][data] = txt
                best_results_val[data][model] = acc


    return pd.DataFrame.from_dict(best_results)[models], best_results_val

In [148]:

1st table is all models and only fixed_r2svm


In [13]:
from itertools import izip

In [27]:
models=[ 'test_r2svm','fixed_r2svm','test_r2elm', \
        'test_elm', 'test_svm', 'test_linear_svm',\
            'triple_r2svm', 'triple_svm', 'triple_fixed_r2svm']

datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4', 'mushrooms']
tb, _ = get_accuracy_table(models=models, datasets = datasets)
# 1. Change column names
tb = tb.rename(columns=dict(izip(models,
                                 ["\rrsvm", "fixed \\rrsvm", "\\drelm", \
                                  "ELM + SIG", "SVM + RBF", "SVM" \
                                  "triple \rrsvm", "triple SVM+RBF", "triple f \rrsvm"])))


h = tb.to_latex(index=True, header=True, formatters=[lambda x:x]*10)

h = h.replace("\\&", "&").replace("\\textbackslash", "\\").replace("\\$", "$").replace("extb", "\\textbf").replace("\\{", "{").replace("\\}", "}").replace("\\\\%", "\\%")

print h


\begin{tabular}{llllllllll}
\toprule
{} &                       \rrsvm &                 fixed \rrsvm &                                      \drelm &                                   ELM + SIG &                                   SVM + RBF &             SVMtriple \rrsvm &                              triple SVM+RBF &                             triple f \rrsvm &                          triple\_fixed\_r2svm \\
\midrule
australian    &  0.87 & {\tiny $ \pm $0.01 } &  0.86 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  	\textbf{0.88} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.87 & {\tiny $ \pm $0.01 } &  0.86 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } \\
bank          &  1.00 & {\tiny $ \pm $0.01 } &  1.00 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.99 & {\tiny $ \pm $0.01 } &                 1.00 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } \\
breast\_cancer &  0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.01} } \\
crashes       &  0.95 & {\tiny $ \pm $0.01 } &  0.95 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &  	\textbf{0.96} & {\tiny  $ \pm $	\textbf{0.02} } &  0.96 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } &                 0.96 & {\tiny $ \pm $0.01 } &                 0.95 & {\tiny $ \pm $0.01 } \\
diabetes      &  0.78 & {\tiny $ \pm $0.01 } &  0.76 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } &  	\textbf{0.78} & {\tiny  $ \pm $	\textbf{0.03} } &                 0.78 & {\tiny $ \pm $0.01 } &  0.78 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.78 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } \\
fourclass     &  0.79 & {\tiny $ \pm $0.01 } &  0.74 & {\tiny $ \pm $0.01 } &                 0.78 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.77 & {\tiny $ \pm $0.01 } &                 0.81 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.77 & {\tiny $ \pm $0.01 } \\
german        &  0.77 & {\tiny $ \pm $0.01 } &  0.73 & {\tiny $ \pm $0.01 } &  	\textbf{0.77} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.75 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &  0.76 & {\tiny $ \pm $0.01 } &                 0.73 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } \\
glass         &  0.64 & {\tiny $ \pm $0.01 } &  0.59 & {\tiny $ \pm $0.01 } &                 0.64 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } &  	\textbf{0.73} & {\tiny  $ \pm $	\textbf{0.05} } &  0.62 & {\tiny $ \pm $0.01 } &                 0.69 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &                 0.65 & {\tiny $ \pm $0.01 } \\
heart         &  0.84 & {\tiny $ \pm $0.01 } &  0.85 & {\tiny $ \pm $0.01 } &  	\textbf{0.85} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.83 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } \\
indian        &  0.73 & {\tiny $ \pm $0.01 } &  0.71 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &  	\textbf{0.73} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.72 & {\tiny $ \pm $0.01 } &  0.72 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } &                 0.72 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } \\
ionosphere    &  0.89 & {\tiny $ \pm $0.01 } &  0.91 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } &                 0.91 & {\tiny $ \pm $0.01 } &  	\textbf{0.96} & {\tiny  $ \pm $	\textbf{0.02} } &  0.89 & {\tiny $ \pm $0.01 } &                 0.89 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } \\
iris          &  0.97 & {\tiny $ \pm $0.01 } &  0.97 & {\tiny $ \pm $0.01 } &                 0.96 & {\tiny $ \pm $0.01 } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.02} } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.03} } &  0.95 & {\tiny $ \pm $0.01 } &  	\textbf{0.98} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.97 & {\tiny $ \pm $0.01 } &                 0.98 & {\tiny $ \pm $0.01 } \\
liver         &  0.70 & {\tiny $ \pm $0.01 } &  0.66 & {\tiny $ \pm $0.01 } &                 0.69 & {\tiny $ \pm $0.01 } &                 0.74 & {\tiny $ \pm $0.01 } &  	\textbf{0.75} & {\tiny  $ \pm $	\textbf{0.04} } &  0.71 & {\tiny $ \pm $0.01 } &                 0.70 & {\tiny $ \pm $0.01 } &                 0.73 & {\tiny $ \pm $0.01 } &                 0.71 & {\tiny $ \pm $0.01 } \\
pendigits     &                          NaN &  0.94 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &  0.93 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &                 1.00 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } \\
satimage      &                          NaN &  0.87 & {\tiny $ \pm $0.01 } &                 0.88 & {\tiny $ \pm $0.01 } &                 0.89 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &  0.82 & {\tiny $ \pm $0.01 } &                 0.92 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.00} } &                                         NaN \\
segment       &                          NaN &  0.93 & {\tiny $ \pm $0.01 } &                 0.94 & {\tiny $ \pm $0.01 } &                 0.93 & {\tiny $ \pm $0.01 } &                 0.97 & {\tiny $ \pm $0.01 } &  0.93 & {\tiny $ \pm $0.01 } &  	\textbf{0.97} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.97 & {\tiny $ \pm $0.01 } &                                         NaN \\
sonar         &  0.75 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &                 0.77 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.89} & {\tiny  $ \pm $	\textbf{0.05} } &  0.76 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } \\
splice        &  0.81 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.88 & {\tiny $ \pm $0.01 } &  0.80 & {\tiny $ \pm $0.01 } &                 0.81 & {\tiny $ \pm $0.01 } &  	\textbf{0.88} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.88 & {\tiny $ \pm $0.01 } \\
svmguide2     &  0.83 & {\tiny $ \pm $0.01 } &  0.82 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.85} & {\tiny  $ \pm $	\textbf{0.02} } &                 0.84 & {\tiny $ \pm $0.01 } \\
svmguide4     &  0.85 & {\tiny $ \pm $0.01 } &  0.73 & {\tiny $ \pm $0.01 } &                 0.80 & {\tiny $ \pm $0.01 } &                 0.76 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  0.81 & {\tiny $ \pm $0.01 } &  	\textbf{0.90} & {\tiny  $ \pm $	\textbf{0.01} } &                 0.87 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } \\
vehicle       &  0.81 & {\tiny $ \pm $0.01 } &  0.78 & {\tiny $ \pm $0.01 } &                 0.80 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &  	\textbf{0.86} & {\tiny  $ \pm $	\textbf{0.01} } &  0.78 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } &                 0.85 & {\tiny $ \pm $0.01 } &                 0.82 & {\tiny $ \pm $0.01 } \\
vowel         &  0.62 & {\tiny $ \pm $0.01 } &  0.49 & {\tiny $ \pm $0.01 } &                 0.54 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &                 0.99 & {\tiny $ \pm $0.01 } &  0.47 & {\tiny $ \pm $0.01 } &                 0.87 & {\tiny $ \pm $0.01 } &  	\textbf{1.00} & {\tiny  $ \pm $	\textbf{0.00} } &                 0.72 & {\tiny $ \pm $0.01 } \\
wine          &  0.83 & {\tiny $ \pm $0.01 } &  0.84 & {\tiny $ \pm $0.01 } &                 0.83 & {\tiny $ \pm $0.01 } &  	\textbf{0.87} & {\tiny  $ \pm $	\textbf{0.04} } &                 0.86 & {\tiny $ \pm $0.01 } &  0.83 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.84 & {\tiny $ \pm $0.01 } &                 0.86 & {\tiny $ \pm $0.01 } \\
\bottomrule
\end{tabular}

STD


In [6]:
models = ['test_elm', 'test_svm', 'test_linear_svm','test_r2svm', 'test_r2elm', 'random_r2svm', 'fixed_r2svm', 
        'triple_svm', 'triple_r2svm', 'triple_fixed_r2svm'] #,exh_r2svm]
datasets = ['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
            'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
            'vehicle', 'svmguide2', 'svmguide4']

pd.options.display.float_format = '{:2.4f}'.format

In [15]:
best_std = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            if 'std' in df.columns:
                best_std[model][data] = df.loc[df['mean_acc'].idxmax(),'std'] * 100
#             scores = df.loc[df['mean_acc'].idxmax(),'acc_fold']
#             best_std[model][data] =  np.mean([np.std(fold_scores) for fold_scores in scores]) * 100

print "Best std"
std_pd = pd.DataFrame.from_dict(best_std)


Best std

training time


In [32]:
best_train_time = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            time = df.loc[df['mean_acc'].idxmax(),'train_time']
            time = time.translate(None, '[]')
            best_train_time[model][data] = np.mean([float(t) for t in time.split(', ')])

pd.options.display.float_format = '{:2.4f}'.format
print "Best train time"
pd.DataFrame.from_dict(best_train_time)


Best train time
Out[32]:
fixed_r2svm test_elm test_linear_svm test_r2elm test_r2svm test_svm triple_fixed_r2svm triple_r2svm triple_svm
australian 0.1464 0.3201 0.0236 1.0581 1.8978 0.0183 0.2988 2.6481 0.0296
bank 0.0156 0.6873 0.0026 1.8457 0.2944 0.0315 0.0217 0.1738 0.0089
breast_cancer 0.0202 0.4864 0.0016 0.2297 0.6347 0.0042 0.1046 0.1283 0.0361
crashes 0.0464 0.2320 0.0026 0.1179 0.3964 0.0055 0.1050 0.3025 0.0070
diabetes 0.0150 0.4592 0.0604 0.1235 0.9053 0.0169 0.0402 2.4775 0.0237
fourclass 0.0501 0.4847 0.0207 0.1171 0.8090 0.0100 0.0621 0.5610 0.0055
german 0.1978 0.3636 0.0081 0.3982 2.6623 0.0387 0.4986 6.4902 0.0594
glass 0.0961 0.0383 0.0473 0.0738 1.1615 0.0037 0.1582 1.0639 0.0042
heart 0.0056 0.0016 0.0042 0.2770 0.4036 0.0025 0.0184 0.1524 0.0026
indian 0.0581 0.2803 0.0554 0.3607 1.5226 0.0309 0.1232 1.6489 0.0172
ionosphere 0.0676 0.6896 0.0059 0.1812 0.4340 0.0036 0.1337 0.1612 0.0054
iris 0.0161 0.5061 0.0061 0.0745 0.2472 0.0008 0.0079 0.0134 0.0006
liver 0.0272 0.1072 0.0286 0.0745 0.4234 0.0070 0.0483 0.4191 0.0051
pendigits 3.1311 2.3122 0.8613 0.4511 nan 3.6527 6.5578 2.2639 1.1979
satimage 7.4973 0.6859 0.6576 0.7384 nan 6.7918 nan 84.3363 3.2430
segment 0.5565 0.9811 0.3835 0.1546 nan 0.1247 nan 0.6978 0.0758
sonar 0.1061 0.0415 0.0061 0.1048 0.3438 0.0042 0.2885 0.1233 0.0052
splice 0.3478 0.2885 0.0048 1.9940 7.0942 0.0975 0.8358 10.7936 0.1778
svmguide2 0.0180 0.0086 0.0122 0.0972 1.6404 0.0065 0.1095 1.4786 0.0093
svmguide4 0.2906 0.0197 0.1181 0.1182 3.0294 0.0204 0.4753 1.8570 0.0201
vehicle 0.3065 0.0275 0.0927 1.3264 3.0749 0.0627 1.0007 4.7532 0.0398
vowel 0.5807 0.0328 0.3102 0.1891 8.3620 0.0732 1.3480 11.7284 0.0995
wine 0.0092 0.4211 0.0029 0.0791 0.3525 0.0021 0.0284 0.2508 0.0012

test time


In [31]:
best_test_time = {model: {} for model in models}
for model in models:
    for data in datasets:
        if model + '_' + data in csv_results.keys():
            df = csv_results[model + '_' + data]
            time = df.loc[df['mean_acc'].idxmax(),'test_time']
            time = time.translate(None, '[]')
            best_test_time[model][data] = np.mean([float(t) for t in time.split(', ')])

            
pd.options.display.float_format = '{:2.6f}'.format
print "Best testtime"
pd.DataFrame.from_dict(best_test_time)  

# multiply those?


Best testtime
Out[31]:
fixed_r2svm test_elm test_linear_svm test_r2elm test_r2svm test_svm triple_fixed_r2svm triple_r2svm triple_svm
australian 0.002008 0.001027 0.000215 0.013957 0.005982 0.003588 0.004445 0.008004 0.002292
bank 0.000993 0.012703 0.000085 0.013209 0.004771 0.004017 0.000880 0.006706 0.001405
breast_cancer 0.001906 0.002688 0.000102 0.006456 0.005233 0.000782 0.001782 0.005418 0.001190
crashes 0.001004 0.001667 0.000072 0.006249 0.005186 0.000592 0.001421 0.010838 0.000794
diabetes 0.001311 0.001376 0.000206 0.005798 0.004459 0.002743 0.002913 0.008306 0.003584
fourclass 0.000938 0.005748 0.000291 0.004168 0.004417 0.001461 0.001573 0.004966 0.000840
german 0.003616 0.004874 0.000950 0.015309 0.007216 0.006229 0.002720 0.017894 0.010373
glass 0.001830 0.000177 0.000141 0.003144 0.003556 0.000359 0.001577 0.003992 0.000602
heart 0.000545 0.000116 0.000096 0.006033 0.004101 0.000368 0.000909 0.004397 0.000514
indian 0.001182 0.000624 0.000195 0.007713 0.004716 0.006192 0.002322 0.005405 0.003123
ionosphere 0.003337 0.006975 0.000074 0.008884 0.005969 0.000529 0.004737 0.011043 0.000815
iris 0.000467 0.000470 0.000084 0.002246 0.002429 0.000122 0.000717 0.002623 0.000094
liver 0.001208 0.000285 0.000123 0.003064 0.003224 0.000679 0.000902 0.004558 0.000932
pendigits 0.013302 0.033696 0.001076 0.029512 nan 0.803777 0.030834 0.053239 0.274664
satimage 0.022120 0.021465 0.003465 0.057006 nan 1.170001 nan 0.127460 0.887032
segment 0.004300 0.010996 0.001712 0.006736 nan 0.022020 nan 0.016372 0.014802
sonar 0.003501 0.000289 0.000171 0.007144 0.004569 0.000655 0.006103 0.011293 0.000904
splice 0.008873 0.001800 0.000137 0.055508 0.009990 0.009210 0.014992 0.038572 0.028539
svmguide2 0.000582 0.000470 0.000125 0.004489 0.004324 0.001134 0.001620 0.004627 0.001673
svmguide4 0.001378 0.000820 0.000169 0.003975 0.004488 0.001869 0.001108 0.004656 0.002286
vehicle 0.002673 0.001297 0.000219 0.013950 0.005454 0.003311 0.002565 0.008661 0.005645
vowel 0.003589 0.001556 0.000264 0.007744 0.007480 0.008195 0.005276 0.005606 0.011099
wine 0.000786 0.000346 0.000140 0.002799 0.002546 0.000150 0.000757 0.002801 0.000210

2. Analysis


In [375]:
[('test_r2svm' in result_dict[d], d) for d in datasets]


Out[375]:
[(True, 'glass'),
 (True, 'australian'),
 (True, 'bank'),
 (True, 'breast_cancer'),
 (True, 'crashes'),
 (True, 'liver'),
 (False, 'segment'),
 (False, 'satimage'),
 (True, 'heart'),
 (True, 'vowel'),
 (True, 'diabetes'),
 (True, 'fourclass'),
 (True, 'german'),
 (True, 'indian'),
 (True, 'ionosphere'),
 (True, 'sonar'),
 (True, 'splice'),
 (True, 'iris'),
 (True, 'wine'),
 (False, 'pendigits'),
 (True, 'vehicle'),
 (True, 'svmguide2'),
 (True, 'svmguide4')]

In [376]:
tb, result_dict = get_accuracy_table()
datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']


points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], (result_dict[d]["test_svm"] - result_dict[d]["test_linear_svm"])) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))


(-0.9112781954887218, 2.3360131773309473e-08)

In [416]:
datasets = list(set(datasets).difference(set(["segment", "pendigits", "satimage"])))


points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], spaceness(d)) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))


(0.68877342555229382, 0.00078420043051622259)

In [414]:
# Define datasets tested

binary = ["australian", "bank", "breast_cancer", "crashes", "diabetes",\
          "fourclass", "german", "heart", "indian", "ionosphere", "liver", "sonar", "splice"]
multi = set(datasets).difference(set(binary))

datasetss=['australian',\
 'bank',\
 'breast_cancer',\
 'crashes',\
 'diabetes',\
 'fourclass',\
 'german',\
 'glass',\
 'heart',\
 'indian',\
 'ionosphere',\
 'iris',\
 'liver',\
 'mushrooms',\
 'pendigits',\
 'satimage',\
 'segment',\
 'sonar',\
 'splice',\
 'svmguide2',\
 'svmguide4',\
 'vehicle',\
 'vowel',\
 'wine']
dim = [14,4,10,20,8,2,24,9,13,10,34,4,6,112,16,36,19,60,60,20,10,18,10,4]
manifolds = [1,3,1,1,2,2,3,6,3,3,24,2,3,40,9,6,7,28,55,15,1,6,8,2]
dimd = {}
manifoldsd = {}
for i in range(len(datasetss)):
    dimd[datasetss[i]] = dim[i]
    manifoldsd[datasetss[i]] = manifolds[i]

def spaceness(i):
    N, M = dimd[i], manifoldsd[i]
    return (N-M)/float(N)
tb, result_dict = get_accuracy_table()

T = 0.04
T_N = 0.35

rbf_stronger = [d for d in datasets if (result_dict[d]["test_svm"] - result_dict[d]["test_linear_svm"])/abs(result_dict[d]["test_svm"]) > T]
rbf_similar = set(datasets).difference(set(rbf_stronger))

spaced = [d for d,N,M in izip(datasets,dim,manifolds) if (N-M)/float(N) > T_N]
compact = set(datasets).difference(set(spaced))

imbalanced = [d for d in datasets if disbalance(d) > 0.15]
balanced = set(datasets).difference(imbalanced)

datasets_groups = {"bal":balanced, "bin":binary, "multi":multi, "rbf_str":rbf_stronger, "rbf_sim":rbf_similar, "spaced":spaced, "comp":compact, \
                   "im":imbalanced}

In [421]:
# Define predicates tested tested

def model_stronger_than_11lin(model, datasets):
    return sum([result_dict[d][model] > 1.1*result_dict[d]['test_linear_svm'] for d in datasets])

def model_stronger_than_09rbf(model, datasets):
    return sum([result_dict[d][model] > 0.98*result_dict[d]['test_svm'] for d in datasets])

def is_stronger_key_on_data(model, data, key='recurrent', threshold=0.95):
    try:
        df = csv_results[model + '_' + data]
        acc_rec = df[df[key] == True]['mean_acc'].max()
        acc_normal = df[df[key] == False]['mean_acc'].max()
        return acc_normal/acc_rec < 0.96
    except:
        return False
    

def is_weaker_key_on_data(model, data, key='recurrent', threshold=0.95):
    try:
        df = csv_results[model + '_' + data]
        acc_rec = df[df[key] == True]['mean_acc'].max()
        acc_normal = df[df[key] == False]['mean_acc'].max()
        return acc_rec/acc_normal < 0.96
    except:
        return False
    
def is_stronger_recurrent(model, datasets):
    return sum(is_stronger_key_on_data(model, d) for d in datasets)

def is_weaer_recurrent(model, datasets):
    return sum(is_weaker_key_on_data(model, d) for d in datasets)


def is_stronger_scale(model, datasets):
    return sum(is_stronger_key_on_data(model, d, key='scale') for d in datasets)

def is_stronger_use_prev(model, datasets):
    return sum(is_stronger_key_on_data(model, d, key='use_prev') for d in datasets)

predicates = {"str_than_11lin":model_stronger_than_11lin, \
              "str_than_09rbf":model_stronger_than_09rbf, \
              "str_rec":is_stronger_recurrent,\
              "str_scale":is_stronger_scale, \
              "str_use_preV":is_stronger_use_prev, \
              "weak_rec": is_weaer_recurrent}

In [323]:
is_stronger_key_on_data("test_r2svm", "glass", key="scale")


0.592894392429 0.581876852691
Out[323]:
True

In [314]:
from itertools import product

In [422]:
def get_model_char(model=["test_r2svm"]):
    results = {k:{} for k in datasets_groups.keys()} 
    for pred_key,d_group_key in product(predicates.keys(), datasets_groups):
        try:
            d_group = datasets_groups[d_group_key]
            pred = predicates[pred_key]
            sum_len, sum_count = 0, 0
            for m in model:
                sum_len += len(d_group)
                sum_count += pred(m, d_group)
                
            results[d_group_key][pred_key] = "%d"%(100.*sum_count/float(sum_len))+" "+ str(sum_count) + "/" + str(sum_len)
        except:
            pass
    return pd.DataFrame.from_dict(results)

t = get_model_char()

In [423]:
get_model_char(["test_r2elm", "test_r2svm"])


Out[423]:
bal bin comp im multi rbf_sim rbf_str spaced
str_rec 0 0/26 3 1/26 7 1/14 5 1/20 0 0/20 0 0/22 4 1/24 0 0/32
str_scale 11 3/26 7 2/26 35 5/14 10 2/20 15 3/20 18 4/22 4 1/24 0 0/32
str_than_09rbf NaN 57 15/26 NaN 60 12/20 NaN 81 18/22 NaN NaN
str_than_11lin NaN 0 0/26 NaN 0 0/20 NaN 0 0/22 NaN NaN
str_use_preV 0 0/26 3 1/26 7 1/14 5 1/20 0 0/20 0 0/22 4 1/24 0 0/32
weak_rec 0 0/26 0 0/26 0 0/14 0 0/20 0 0/20 0 0/22 0 0/24 0 0/32

In [328]:
get_model_char("test_r2svm")


Out[328]:
bin comp im multi rbf_sim rbf_str spaced
str_rec 0 0/13 0 0/8 0 0/15 10 1/10 0 0/18 20 1/5 6 1/15
str_scale 7 1/13 37 3/8 13 2/15 40 4/10 22 4/18 20 1/5 13 2/15
str_than_09rbf 61 8/13 75 6/8 73 11/15 70 7/10 77 14/18 20 1/5 60 9/15
str_than_11lin 0 0/13 12 1/8 6 1/15 30 3/10 5 1/18 40 2/5 13 2/15
str_use_preV 0 0/13 0 0/8 0 0/15 10 1/10 0 0/18 20 1/5 6 1/15

In [309]:
get_model_char("triple_fixed_r2svm")


Out[309]:
bin comp im multi rbf_sim rbf_str spaced
str_rec 0 0/13 0 0/8 0 0/15 0 0/10 0 0/18 0 0/5 0 0/15
str_scale 0 0/13 0 0/8 0 0/15 0 0/10 0 0/18 0 0/5 0 0/15
str_than_09rbf 69 9/13 62 5/8 NaN NaN NaN NaN NaN
str_than_11lin 15 2/13 0 0/8 NaN NaN NaN NaN NaN
str_use_preV 0 0/13 0 0/8 0 0/15 0 0/10 0 0/18 0 0/5 0 0/15

In [ ]:

Misc analysis


In [242]:
tb, result_dict = get_accuracy_table()
datasets=['glass', 'australian', 'bank','breast_cancer', 'crashes', 'liver', 'segment', 'satimage', 'heart', 'vowel',
                'diabetes', 'fourclass', 'german', 'indian', 'ionosphere', 'sonar', 'splice', 'iris', 'wine', 'pendigits',
                'vehicle', 'svmguide2', 'svmguide4']
points = [(result_dict[d]["test_r2svm"]/result_dict[d]["test_svm"], disbalance(d)) for d in datasets]
%matplotlib inline

X = [p[1] for p in points]
Y = [p[0] for p in points]
plt.scatter(X, Y)
print(scipy.stats.spearmanr(X, Y))


(0.10781409823989753, 0.62437793810711406)

In [227]:
from collections import Counter

In [228]:
glass =

In [ ]:
Counter(glass.target)

In [247]:
def disbalance(name="glass"):
    data = fetch_uci_datasets([name])[0]
    c = Counter(data.target)
    return (max(c.values()) - min(c.values()))/(float(len(data.target)))

In [250]:
disbalance('australian')


Out[250]:
0.11014492753623188

In [240]:



Out[240]:
11.166666666666666