In [600]:
%reset -f

In [601]:
from IPython.display import display, HTML
from sklearn import metrics
from sklearn import cross_validation
from scipy import interp
import pandas as pd
import numpy as np
import os

import sys
sys.path.insert(1,"../../src/")
from TypeFeatImputer import TypeFeatImputer
from i_score_parallel import i_score
from TypeFeatFS import DiscreteFS, ContinuousFS
from TypeFeatFilter import DiscreteFilter, ContinuousFilter
from OutlierFiltering import OutlierFiltering
from typeFeat_score import typeFeat_score

from pylab import *
import seaborn as sns
import matplotlib.pyplot as plt


np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:,.4f}'.format
plt.style.use('classic')

%matplotlib inline

In [619]:
ddf = []
pathDir = os.path.join('resources','results') 
path, dirs, files = os.walk(pathDir).next()
file_count = len(files)

for f in files:
    if "summary" not in str(f):
        ddf.append(pd.read_pickle(os.path.join('resources','results', str(f) )))
    
dfAll = pd.concat(ddf)
print dfAll.shape


(340, 35)

In [620]:
dfAll.cv_f1_mean = pd.to_numeric(dfAll.cv_f1_mean)
dfAll.cv_auc_mean = pd.to_numeric(dfAll.cv_auc_mean)
dfAll.cv_sens_mean = pd.to_numeric(dfAll.cv_sens_mean)
dfAll.cv_spec_mean = pd.to_numeric(dfAll.cv_spec_mean)

dfAll.cv_f1_std = pd.to_numeric(dfAll.cv_f1_std)
dfAll.cv_auc_std = pd.to_numeric(dfAll.cv_auc_std)
dfAll.cv_sens_std = pd.to_numeric(dfAll.cv_sens_std)
dfAll.cv_spec_std = pd.to_numeric(dfAll.cv_spec_std)
dfAll.exp = pd.to_numeric(dfAll.exp)

dfAll["cv_auc"] = dfAll["cv_auc_mean"].round(3).astype(str).str.cat(dfAll["cv_auc_std"].round(3).astype(str), sep="+/-")
dfAll["cv_f1"] = dfAll["cv_f1_mean"].round(3).astype(str).str.cat(dfAll["cv_f1_std"].round(3).astype(str), sep="+/-")
dfAll["cv_sens"] = dfAll["cv_sens_mean"].round(3).astype(str).str.cat(dfAll["cv_sens_std"].round(3).astype(str), sep="+/-")
dfAll["cv_spec"] = dfAll["cv_spec_mean"].round(3).astype(str).str.cat(dfAll["cv_spec_std"].round(3).astype(str), sep="+/-")
dfAll["disease"] = dfAll["name"].str.slice(0,dfAll["name"].str.find("_").values[0])

print
print dfAll.columns.tolist()
print dfAll.name.unique()
print dfAll["disease"].unique()
print dfAll.shape


['exp', 'name', 'size_tr', 'fs', 'sm', 'cls', 'metric', 'params', 'tr_sens', 'tr_spec', 'tr_auc', 'tr_prec', 'tr_rec', 'tr_f1', 'cv_sens_mean', 'cv_sens_std', 'cv_spec_mean', 'cv_spec_std', 'cv_auc_mean', 'cv_auc_std', 'cv_prec_mean', 'cv_prec_std', 'cv_rec_mean', 'cv_rec_std', 'cv_f1_mean', 'cv_f1_std', 'test_sens', 'test_spec', 'test_auc', 'test_rec', 'test_prec', 'test_f1', 'cm_test', 'time', 'pipeline', 'cv_auc', 'cv_f1', 'cv_sens', 'cv_spec', 'disease']
['all_extended_last_early_readmission_vs_none'
 'all_reduced_last_early_readmission_vs_none'
 'all_reduced_last_all_readmisssion_vs_none'
 'all_extended_last_all_readmisssion_vs_none'
 'all_extended_extra_last_early_readmission_vs_none'
 'Diabetis_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'all_extended_extra_last_all_readmisssion_vs_none'
 'Circulatory_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Digestive_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Genitourinary_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Poisoning_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Muscoskeletal_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Neoplasms_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Respiratory_extended_extra_diag_1_last_all_readmisssion_vs_none'
 'Diabetis_extended_extra_diag_1_last_early_readmission_vs_none'
 'Circulatory_extended_extra_diag_1_last_early_readmission_vs_none'
 'Digestive_extended_extra_diag_1_last_early_readmission_vs_none'
 'Genitourinary_extended_extra_diag_1_last_early_readmission_vs_none'
 'Poisoning_extended_extra_diag_1_last_early_readmission_vs_none'
 'Muscoskeletal_extended_extra_diag_1_last_early_readmission_vs_none'
 'Neoplasms_extended_extra_diag_1_last_early_readmission_vs_none'
 'Respiratory_extended_extra_diag_1_last_early_readmission_vs_none'
 'all_extended_extra_diag_3_last_early_readmission_vs_none'
 'all_extended_extra_diag_3_last_all_readmisssion_vs_none']
['all' 'Dia' 'Cir' 'Dig' 'Gen' 'Poi' 'Mus' 'Neo' 'Res']
(340, 40)

In [662]:
typeEncounter = "last" # ['first','last']
typeHypothesis = "all_readmisssion_vs_none" # ['all_readmisssion_vs_none','early_readmission_vs_none']
typeDataFeatures = "extended_extra_diag_1" # ["reduced","extended','extended_extra','extended_extra_diag_1','extended_extra_diag_3']
    #Extended_Extra_diag_1 -> Extended extra columns with disease in diag_3
    #Extended_Extra_diag_1 -> Extended extra columns with disease in diag_1
    #Extended_Extra -> Extended extra of columns
    #Extended -> Extended columns
    #Reduced -> minimum set of columns 
typeDataExperiment = "all" #["all", "disease"] 
    #all -> No filter disease column
    #disease -> Filter/Remove disease column and keep only rows with diagnosis column == 1  

name = typeDataExperiment + "_" + typeDataFeatures + "_" +  typeEncounter + "_" + typeHypothesis

print name


all_extended_extra_diag_1_last_all_readmisssion_vs_none

In [663]:
#Sort and list all pipelines
dfAll.sort_values(["cv_auc_mean"], ascending=False,inplace=True)
condition = np.logical_and(dfAll['name'].str.contains(typeHypothesis), 
                           dfAll.disease != typeDataExperiment)
print dfAll[condition].shape
print np.sum(dfAll[condition]["time"])


dfAll[condition][["disease","name","fs","metric","cls","params",
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]]


(96, 40)
4096.31510019
Out[663]:
disease name fs metric cls params tr_auc tr_sens tr_spec cv_auc_mean cv_f1_mean cv_sens_mean cv_spec_mean test_auc test_f1 test_sens test_spec
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none recall rf [entropy, 8, 250] 0.7306 0.6667 0.7945 0.6882 0.6387 0.5305 0.7241 0.6103 0.6199 0.5427 0.6779
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1_weighted logReg [balanced, 0.1, l1] 0.6422 0.6020 0.6825 0.6860 0.6403 0.5866 0.6795 0.6116 0.6184 0.5726 0.6505
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1 logReg [balanced, 0.1, l1] 0.6422 0.6020 0.6825 0.6860 0.6403 0.5866 0.6795 0.6116 0.6184 0.5726 0.6505
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none recall logReg [balanced, 0.1, l1] 0.6422 0.6020 0.6825 0.6860 0.6403 0.5866 0.6795 0.6116 0.6184 0.5726 0.6505
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1 rf [gini, 8, 500] 0.7451 0.6709 0.8193 0.6857 0.6372 0.5252 0.7259 0.6124 0.6222 0.5427 0.6821
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1_weighted rf [gini, 8, 300] 0.7451 0.6728 0.8175 0.6853 0.6381 0.5289 0.7244 0.6122 0.6224 0.5370 0.6874
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none recall rf [gini, 4, 250] 0.6454 0.5470 0.7438 0.6757 0.6382 0.5078 0.7222 0.5976 0.6200 0.5000 0.6953
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1 rf [gini, 4, 250] 0.6454 0.5470 0.7438 0.6757 0.6382 0.5078 0.7222 0.5976 0.6200 0.5000 0.6953
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1_weighted rf [entropy, 4, 350] 0.6395 0.5380 0.7410 0.6749 0.6374 0.5071 0.7213 0.5968 0.6191 0.5000 0.6937
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1 logReg [balanced, 0.05, l1] 0.6329 0.6588 0.6069 0.6658 0.6215 0.6439 0.5995 0.6138 0.6147 0.6328 0.5948
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none recall logReg [balanced, 0.05, l1] 0.6329 0.6588 0.6069 0.6658 0.6215 0.6439 0.5995 0.6138 0.6147 0.6328 0.5948
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1_weighted logReg [balanced, 0.1, l2] 0.6346 0.6365 0.6326 0.6637 0.6330 0.6216 0.6329 0.6157 0.6231 0.6042 0.6272
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1 nb [] 0.5803 0.2784 0.8821 0.6599 0.5840 0.2902 0.8666 0.5580 0.5593 0.2464 0.8695
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none recall nb [] 0.5803 0.2784 0.8821 0.6599 0.5840 0.2902 0.8666 0.5580 0.5593 0.2464 0.8695
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1_weighted nb [] 0.5803 0.2784 0.8821 0.6599 0.5840 0.2902 0.8666 0.5580 0.5593 0.2464 0.8695
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none recall nb [] 0.5264 0.0951 0.9576 0.6401 0.5286 0.5948 0.5477 0.5196 0.5200 0.0781 0.9611
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1 nb [] 0.5264 0.0951 0.9576 0.6401 0.5286 0.5948 0.5477 0.5196 0.5200 0.0781 0.9611
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1_weighted nb [] 0.5264 0.0951 0.9576 0.6401 0.5286 0.5948 0.5477 0.5196 0.5200 0.0781 0.9611
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none recall rf [gini, 4, 200] 0.6010 0.4737 0.7284 0.6370 0.6103 0.4375 0.7351 0.5761 0.6005 0.4248 0.7274
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none f1 rf [gini, 4, 200] 0.6010 0.4737 0.7284 0.6370 0.6103 0.4375 0.7351 0.5761 0.6005 0.4248 0.7274
0 Res Respiratory_extended_extra_diag_1_last_all_rea... none f1_weighted rf [gini, 8, 500] 0.7066 0.6035 0.8097 0.6359 0.5960 0.4915 0.6911 0.5783 0.5836 0.4702 0.6864
0 Res Respiratory_extended_extra_diag_1_last_all_rea... none recall rf [entropy, 12, 250] 0.8605 0.8135 0.9075 0.6354 0.5936 0.5022 0.6753 0.5834 0.5883 0.5135 0.6534
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none recall logReg [balanced, 0.05, l1] 0.6070 0.6253 0.5887 0.6353 0.5936 0.6032 0.5798 0.5913 0.5927 0.6071 0.5754
0 Res Respiratory_extended_extra_diag_1_last_all_rea... none f1 rf [entropy, 12, 400] 0.8593 0.8090 0.9096 0.6352 0.5952 0.5018 0.6786 0.5839 0.5888 0.5135 0.6544
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none f1_weighted rf [gini, 8, 250] 0.7298 0.6193 0.8402 0.6350 0.6161 0.4501 0.7353 0.5941 0.6164 0.4662 0.7220
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none recall logReg [balanced, 0.005, l2] 0.6066 0.5506 0.6627 0.6331 0.6084 0.5243 0.6613 0.5946 0.6108 0.5214 0.6679
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none f1 logReg [balanced, 0.005, l2] 0.6066 0.5506 0.6627 0.6331 0.6084 0.5243 0.6613 0.5946 0.6108 0.5214 0.6679
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1_weighted rf [entropy, 12, 300] 0.7501 0.7100 0.7901 0.6330 0.6020 0.5308 0.6566 0.6074 0.6135 0.5688 0.6460
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1 rf [gini, 8, 250] 0.6409 0.6017 0.6801 0.6306 0.5994 0.5349 0.6483 0.6043 0.6106 0.5649 0.6437
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none recall rf [gini, 8, 250] 0.6409 0.6017 0.6801 0.6306 0.5994 0.5349 0.6483 0.6043 0.6106 0.5649 0.6437
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1_weighted knn [uniform, 11] 0.6433 0.4802 0.8065 0.5803 0.5595 0.3633 0.7334 0.5577 0.5695 0.3923 0.7231
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none f1_weighted logReg [None, 0.0001, l2] 0.5529 0.2878 0.8180 0.5781 0.6129 0.2861 0.7841 0.5124 0.5959 0.2340 0.7908
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none recall logReg [balanced, 1e-05, l2] 0.5791 0.5701 0.5881 0.5777 0.5814 0.5297 0.5830 0.5528 0.5707 0.5489 0.5566
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none f1 logReg [balanced, 1e-05, l2] 0.5791 0.5701 0.5881 0.5777 0.5814 0.5297 0.5830 0.5528 0.5707 0.5489 0.5566
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none recall nb [] 0.5007 0.9998 0.0016 0.5721 0.2710 0.9915 0.0112 0.5001 0.2593 1.0000 0.0003
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1 nb [] 0.5007 0.9998 0.0016 0.5721 0.2710 0.9915 0.0112 0.5001 0.2593 1.0000 0.0003
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1_weighted nb [] 0.5007 0.9998 0.0016 0.5721 0.2710 0.9915 0.0112 0.5001 0.2593 1.0000 0.0003
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none f1 knn [uniform, 3] 0.7647 0.6955 0.8339 0.5592 0.5568 0.4329 0.6578 0.5471 0.5585 0.4366 0.6576
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none f1_weighted knn [uniform, 7] 0.6605 0.4618 0.8592 0.5563 0.5603 0.2907 0.7771 0.5369 0.5636 0.3076 0.7662
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none recall knn [uniform, 1] 0.9997 0.9994 1.0000 0.5552 0.5673 0.4541 0.6563 0.5274 0.5397 0.4316 0.6232
0 Dia Diabetis_extended_extra_diag_1_last_all_readmi... none f1 knn [uniform, 1] 0.9997 0.9994 1.0000 0.5552 0.5673 0.4541 0.6563 0.5274 0.5397 0.4316 0.6232
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none f1 nb [] 0.5042 1.0000 0.0084 0.5530 0.2320 0.9989 0.0077 0.5030 0.2296 1.0000 0.0061
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none f1_weighted nb [] 0.5042 1.0000 0.0084 0.5530 0.2320 0.9989 0.0077 0.5030 0.2296 1.0000 0.0061
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none recall nb [] 0.5042 1.0000 0.0084 0.5530 0.2320 0.9989 0.0077 0.5030 0.2296 1.0000 0.0061
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none f1_weighted knn [distance, 3] 1.0000 1.0000 1.0000 0.5455 0.5622 0.3635 0.7071 0.5339 0.5581 0.4060 0.6618
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5396 0.5627 0.4142 0.6649 0.5363 0.5611 0.3876 0.6850
0 Dig Digestive_extended_extra_diag_1_last_all_readm... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5396 0.5627 0.4142 0.6649 0.5363 0.5611 0.3876 0.6850
0 Mus Muscoskeletal_extended_extra_diag_1_last_all_r... none f1_weighted knn [distance, 3] 1.0000 1.0000 1.0000 0.5391 0.5718 0.3141 0.7363 0.5518 0.5963 0.3590 0.7446
0 Res Respiratory_extended_extra_diag_1_last_all_rea... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5376 0.5420 0.4821 0.5931 0.5365 0.5416 0.4643 0.6087
0 Res Respiratory_extended_extra_diag_1_last_all_rea... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5376 0.5420 0.4821 0.5931 0.5365 0.5416 0.4643 0.6087
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5323 0.5608 0.3896 0.6750 0.5617 0.5890 0.4297 0.6937
0 Gen Genitourinary_extended_extra_diag_1_last_all_r... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5323 0.5608 0.3896 0.6750 0.5617 0.5890 0.4297 0.6937
0 Cir Circulatory_extended_extra_diag_1_last_all_rea... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5314 0.5415 0.4509 0.6120 0.5251 0.5354 0.4435 0.6067
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none f1_weighted knn [uniform, 3] 0.7226 0.5118 0.9333 0.5301 0.6094 0.2424 0.8115 0.5377 0.6198 0.2596 0.8157
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5268 0.5977 0.3273 0.7264 0.5568 0.6265 0.3574 0.7562
0 Neo Neoplasms_extended_extra_diag_1_last_all_readm... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5268 0.5977 0.3273 0.7264 0.5568 0.6265 0.3574 0.7562
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5237 0.5469 0.4064 0.6410 0.5325 0.5539 0.4323 0.6326
0 Poi Poisoning_extended_extra_diag_1_last_all_readm... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5237 0.5469 0.4064 0.6410 0.5325 0.5539 0.4323 0.6326
0 Mus Muscoskeletal_extended_extra_diag_1_last_all_r... none f1 knn [uniform, 1] 1.0000 1.0000 1.0000 0.5189 0.5603 0.3622 0.6756 0.5498 0.5869 0.4169 0.6828
0 Mus Muscoskeletal_extended_extra_diag_1_last_all_r... none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5189 0.5603 0.3622 0.6756 0.5498 0.5869 0.4169 0.6828

96 rows × 17 columns


In [656]:
#Filter pipelines less performance than random
dfAllAux = dfAll[condition]
dfAllAux[[
    "name","fs","metric","params",
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]]


Out[656]:
name fs metric params tr_auc tr_sens tr_spec cv_auc_mean cv_f1_mean cv_sens_mean cv_spec_mean test_auc test_f1 test_sens test_spec
cls
knn all_reduced_last_all_readmisssion_vs_none none f1_weighted [uniform, 11] 0.6417 0.4415 0.8418 0.5991 0.5817 0.3372 0.7853 0.5633 0.5839 0.3444 0.7822
logReg all_extended_extra_diag_3_last_all_readmisssio... none f1 [balanced, 0.05, l2] 0.6035 0.5712 0.6358 0.6430 0.6089 0.5664 0.6343 0.5994 0.6072 0.5706 0.6281
nb all_extended_extra_last_all_readmisssion_vs_none none recall [] 0.5006 0.9998 0.0014 0.5012 0.2381 0.9995 0.0015 0.5002 0.2370 0.9996 0.0007
rf all_extended_last_all_readmisssion_vs_none none f1_weighted [gini, 12, 200] 0.6932 0.6727 0.7136 0.6543 0.6168 0.5695 0.6461 0.6067 0.6140 0.5811 0.6323

In [549]:
dfAllAux[[
    "metric","fs","cls","params",
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]].to_latex()


Out[549]:
u'\\begin{tabular}{llllllllrrrrllll}\n\\toprule\n{} &       metric &    fs &     cls &                  params & tr\\_auc & tr\\_sens & tr\\_spec &  cv\\_auc\\_mean &  cv\\_f1\\_mean &  cv\\_sens\\_mean &  cv\\_spec\\_mean & test\\_auc & test\\_f1 & test\\_sens & test\\_spec \\\\\n\\midrule\n0 &  f1\\_weighted &  none &      rf &         [gini, 12, 400] & 0.6936 &  0.6732 &  0.7140 &       0.6536 &      0.6153 &        0.5659 &        0.6462 &   0.6101 &  0.6175 &    0.5833 &    0.6369 \\\\\n0 &           f1 &  none &      rf &          [gini, 8, 350] & 0.6234 &  0.6062 &  0.6407 &       0.6493 &      0.6121 &        0.5836 &        0.6271 &   0.6077 &  0.6131 &    0.5947 &    0.6207 \\\\\n0 &       recall &  none &      rf &          [gini, 8, 350] & 0.6236 &  0.6064 &  0.6409 &       0.6493 &      0.6121 &        0.5836 &        0.6271 &   0.6078 &  0.6131 &    0.5947 &    0.6208 \\\\\n0 &  f1\\_weighted &  none &  logReg &   [balanced, 0.005, l2] & 0.5978 &  0.5673 &  0.6282 &       0.6382 &      0.6027 &        0.5607 &        0.6276 &   0.5971 &  0.6050 &    0.5678 &    0.6263 \\\\\n0 &           f1 &  none &  logReg &  [balanced, 0.0001, l2] & 0.5940 &  0.5807 &  0.6073 &       0.6307 &      0.5941 &        0.5772 &        0.6003 &   0.5930 &  0.5984 &    0.5806 &    0.6053 \\\\\n0 &       recall &  none &  logReg &   [balanced, 1e-05, l2] & 0.5858 &  0.5911 &  0.5805 &       0.6207 &      0.5854 &        0.5881 &        0.5776 &   0.5884 &  0.5906 &    0.5957 &    0.5811 \\\\\n\\bottomrule\n\\end{tabular}\n'

Grouped by Diseases


In [646]:
dfAllAux = dfAllAux.sort_values(["disease","cv_auc_mean"], ascending=False,inplace=False)
dfAllAux = dfAllAux[np.logical_and(dfAllAux["cv_sens_mean"] > 0.5,dfAllAux['cv_spec_mean']> 0.5)]
dfAllAux = dfAllAux.groupby(["disease"]).first()
dfAllAux[["sm","fs","metric","cls","params",
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]]


Out[646]:
sm fs metric cls params tr_auc tr_sens tr_spec cv_auc_mean cv_f1_mean cv_sens_mean cv_spec_mean test_auc test_f1 test_sens test_spec
disease
Cir none none f1_weighted rf [entropy, 12, 300] 0.7501 0.7100 0.7901 0.6330 0.6020 0.5308 0.6566 0.6074 0.6135 0.5688 0.6460
Dia none none recall rf [entropy, 8, 250] 0.7306 0.6667 0.7945 0.6882 0.6387 0.5305 0.7241 0.6103 0.6199 0.5427 0.6779
Dig none none recall logReg [balanced, 0.005, l2] 0.6066 0.5506 0.6627 0.6331 0.6084 0.5243 0.6613 0.5946 0.6108 0.5214 0.6679
Gen none none recall rf [gini, 4, 250] 0.6454 0.5470 0.7438 0.6757 0.6382 0.5078 0.7222 0.5976 0.6200 0.5000 0.6953
Mus none none recall logReg [balanced, 0.1, l1] 0.6095 0.5692 0.6498 0.6193 0.6001 0.5498 0.6177 0.5840 0.6042 0.5349 0.6331
Neo none none recall logReg [balanced, 1e-05, l2] 0.5791 0.5701 0.5881 0.5777 0.5814 0.5297 0.5830 0.5528 0.5707 0.5489 0.5566
Poi none none recall logReg [balanced, 0.05, l1] 0.6070 0.6253 0.5887 0.6353 0.5936 0.6032 0.5798 0.5913 0.5927 0.6071 0.5754
Res none none recall rf [entropy, 12, 250] 0.8605 0.8135 0.9075 0.6354 0.5936 0.5022 0.6753 0.5834 0.5883 0.5135 0.6534

In [531]:
print dfAllAux[["metric","cls","params","tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean","test_auc",
                "test_f1","test_sens","test_spec"]].to_latex()


\begin{tabular}{llllrrrrrrrrrrr}
\toprule
{} &  metric &     cls &                  params &  tr\_auc &  tr\_sens &  tr\_spec &  cv\_auc\_mean &  cv\_f1\_mean &  cv\_sens\_mean &  cv\_spec\_mean &  test\_auc &  test\_f1 &  test\_sens &  test\_spec \\
disease &         &         &                         &         &          &          &              &             &               &               &           &          &            &            \\
\midrule
Cir     &      f1 &  logReg &    [balanced, 0.05, l1] &  0.6150 &   0.5833 &   0.6467 &       0.6411 &      0.6802 &        0.5686 &        0.6352 &    0.5902 &   0.6745 &     0.5498 &     0.6306 \\
Dia     &  recall &  logReg &   [balanced, 1e-05, l2] &  0.6439 &   0.6751 &   0.6126 &       0.7087 &      0.6622 &        0.6925 &        0.5856 &    0.6727 &   0.6592 &     0.7763 &     0.5690 \\
Dig     &  recall &  logReg &  [balanced, 0.0005, l2] &  0.6458 &   0.6667 &   0.6250 &       0.6469 &      0.6911 &        0.5903 &        0.6289 &    0.5978 &   0.6924 &     0.5616 &     0.6339 \\
Gen     &  recall &  logReg &    [balanced, 0.05, l1] &  0.6813 &   0.7225 &   0.6400 &       0.6533 &      0.6860 &        0.6222 &        0.6236 &    0.6293 &   0.6916 &     0.6292 &     0.6294 \\
Neo     &  recall &  logReg &  [balanced, 0.0001, l2] &  0.6209 &   0.6824 &   0.5593 &       0.6263 &      0.6547 &        0.6318 &        0.5682 &    0.5879 &   0.6403 &     0.6250 &     0.5509 \\
Poi     &      f1 &  logReg &    [balanced, 0.05, l1] &  0.6343 &   0.6893 &   0.5793 &       0.6562 &      0.6351 &        0.6804 &        0.5569 &    0.6372 &   0.6403 &     0.7172 &     0.5572 \\
Res     &      f1 &  logReg &     [balanced, 0.1, l2] &  0.6632 &   0.6409 &   0.6855 &       0.6486 &      0.7045 &        0.5431 &        0.6656 &    0.5689 &   0.6863 &     0.4903 &     0.6476 \\
\bottomrule
\end{tabular}

Save results


In [550]:
import time

fname = "summary" + "_" + name + "_" + time.strftime("%Y%m%d-%H%M%S") + ".pkl"
dfAllAux.to_pickle(os.path.join(pathDir, fname))
print fname


summary_reduced_last_all_readmisssion_vs_none_20171025-115456.pkl

In [551]:
dd = pd.read_pickle(os.path.join(pathDir,fname))
print dd.columns


Index([u'exp', u'name', u'size_tr', u'fs', u'sm', u'cls', u'metric', u'params',
       u'tr_sens', u'tr_spec', u'tr_auc', u'tr_prec', u'tr_rec', u'tr_f1',
       u'cv_sens_mean', u'cv_sens_std', u'cv_spec_mean', u'cv_spec_std',
       u'cv_auc_mean', u'cv_auc_std', u'cv_prec_mean', u'cv_prec_std',
       u'cv_rec_mean', u'cv_rec_std', u'cv_f1_mean', u'cv_f1_std',
       u'test_sens', u'test_spec', u'test_auc', u'test_rec', u'test_prec',
       u'test_f1', u'cm_test', u'time', u'pipeline', u'cv_auc', u'cv_f1',
       u'cv_sens', u'cv_spec', u'disease'],
      dtype='object')

In [ ]: