In [255]:
#TO RE-RUN
%reset -f

In [256]:
from sklearn import preprocessing
from time import time
import numpy as np
import csv
from sklearn import metrics
from sklearn.preprocessing import scale
from sklearn.feature_selection import VarianceThreshold
from sklearn.cross_validation import StratifiedShuffleSplit, cross_val_score

from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB, MultinomialNB, GaussianNB

from sklearn.grid_search import GridSearchCV, ParameterGrid
from sklearn.preprocessing import StandardScaler

from imblearn.over_sampling import SMOTE,ADASYN, RandomOverSampler
from imblearn.pipeline import Pipeline
from imblearn.pipeline import make_pipeline

from operator import truediv
from datetime import datetime
import pandas as pd
import time
import os

from pylab import *
import seaborn as sns
import matplotlib.pyplot as plt


np.set_printoptions(suppress=True)
pd.options.display.float_format = '{:,.4f}'.format
plt.style.use('classic')

%matplotlib inline

Required domain methods


In [261]:
import sys
sys.path.insert(1, "../src/")
from TypeFeatImputer import TypeFeatImputer
from UnivCombineFilter import UnivCombineFilter
import MLpipeline as MLpipeline
import readmision_methods as rm

Prepare experiments


In [281]:
typeEncounter = "last" # ['first','last']
typeHypothesis = "all_readmisssion_vs_none" # ['all_readmisssion_vs_none','early_readmission_vs_none']
typeDataFeatures = "extended_extra_diag_3" # ["reduced","extended','extended_extra','extended_extra_diag_1','extended_extra_diag_3']
    #Extended_Extra_diag_1 -> Extended extra columns with disease in diag_3
    #Extended_Extra_diag_1 -> Extended extra columns with disease in diag_1
    #Extended_Extra -> Extended extra of columns
    #Extended -> Extended columns
    #Reduced -> minimum set of columns 
typeDataExperiment = "all" #["all", "disease"] 
    #all -> No filter disease column
    #disease -> Filter/Remove disease column and keep only rows with diagnosis column == 1

In [282]:
verbose = True
cv_thr = 0.3
cv_folds = 5

tr_thrs = [1.0] # [0.1,0.2,0.4,0.6,1.0]
ts_thr = 0.30

fs_methods = ["none","combine_fs","lasso_fs","rfe_rf_fs"] #["none","combine_fs","lasso_fs","rfe_rf_fs"]
cls_methods = ["logReg"] #["rf","svmRBF","logReg","knn","nn","gbt"]
lms = ["recall","f1"] #["f1_weighted","average_precision","roc_auc","recall"]
sm_types = ["none"] #["none","after"]
sm_method = "sm_smote"

In [283]:
#Load data
df_all = rm.load_data(typeEncounter, typeDataFeatures)
print "\nSHAPE:"
print df_all.shape

#Filter data by class
df_all = rm.filter_data_by_class(df_all, typeHypothesis)
print "\nSHAPE FILTERED:"
print df_all.shape

print "\nRows by class type:"
print df_all.iloc[:,-1].sort_values().unique(), np.sum(df_all["readmitted"] == 0), np.sum(df_all["readmitted"] == 1)

#Train & Test
X_train, X_test, y_train, y_test = MLpipeline.train_test_partition(df_all)
print "Train:", X_train.shape, "Test:",  X_test.shape

#Load hyperparams
hyperparams = np.load("../src/default_hyperparams.npy")
print "\nHyperparams:"
print hyperparams


SHAPE:
(67182, 69)

SHAPE FILTERED:
(67182, 69)

Rows by class type:
[0 1] 39785 27397
readmision_methods.py:20: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df_all["readmitted"][df_all["readmitted"].values > 0] = 1
Train: (47027, 68) Test: (20155, 68)

Hyperparams:
[['fs' 'combine_fs' {'combine_fs__percentile': [5, 10, 20, 30, 40, 50]}]
 ['fs' 'rfe_rf_fs'
  {'rfe_rf_fs__n_features_to_select': [5, 10, 15, 20], 'rfe_rf_fs__step': [0.1]}]
 ['fs' 'lasso_fs' {'lasso_fs__estimator__C': [0.001, 0.01, 0.1, 1]}]
 ['cls' 'knn'
  {'knn__weights': ['uniform', 'distance'], 'knn__n_neighbors': [1, 3, 5, 7, 9, 11]}]
 ['cls' 'logReg'
  {'logReg__class_weight': [None, 'balanced'], 'logReg__C': [1e-05, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 15, 30], 'logReg__penalty': ['l1', 'l2']}]
 ['cls' 'svmRBF'
  {'svmRBF__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5], 'svmRBF__class_weight': [None, 'balanced'], 'svmRBF__C': [0.01, 0.1, 0.5, 1, 5, 10, 30, 50, 100]}]
 ['cls' 'rf'
  {'rf__criterion': ['entropy', 'gini'], 'rf__max_depth': [None, 4, 8, 12], 'rf__n_estimators': [200, 250, 300, 350, 400, 500]}]
 ['cls' 'nn'
  {'nn__hidden_layer_sizes': [(30,), (50,), (70,), (100,), (150,), (30, 30), (50, 50), (70, 70), (100, 100), (30, 30, 30), (50, 50, 50), (70, 70, 70)], 'nn__alpha': [1e-05, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 3, 5, 10]}]
 ['cls' 'gbt'
  {'gbt__max_depth': [None, 8, 10, 12], 'gbt__learning_rate': [0.1, 0.01, 0.001], 'gbt__n_estimators': [300, 400, 500]}]
 ['after' 'sm_smote' {'sm_smote__k_neighbors': [3, 4, 5]}]]

In [284]:
from IPython.display import display, HTML

res = []

experiments = rm.get_experiments(typeDataExperiment)
print experiments

for exp in experiments:
    
    print "\nEXPERIMENT:", exp
    print "******************\n"
    
    #Filter data by disease
    df_all_filtered = rm.filter_data_by_experiment(df_all, exp)
    
    #Get features by type
    catCols, reducedCols = rm.compute_type_features(df_all_filtered)
    
    print df_all_filtered.shape
    print df_all_filtered.columns
       
    #Create id of experiment
    name = exp + "_" + typeDataFeatures + "_" + typeEncounter + "_" + typeHypothesis    
    print "\n", name
    

    #Execute experiment
    res.append(MLpipeline.run(name, df_all_filtered, catCols, reducedCols, hyperparams, ts_thr, tr_thrs, 
                   fs_methods, sm_method, sm_types, 
                   cls_methods, lms, cv_folds, cv_thr, True, True))


['all']

EXPERIMENT: all
******************

(67182, 69)
Index([u'gender', u'age', u'race_AfricanAmerican', u'race_Caucasian',
       u'race_Other', u'HbA1c', u'Change', u'time_in_hospital', u'diabetesMed',
       u'diss_home', u'medSpec_cardio', u'medSpec_Family/GeneralPractice',
       u'medSpec_InternalMedicine', u'medSpec_surgery', u'adm_src_1',
       u'adm_src_2', u'adm_src_3', u'adm_src_4', u'adm_src_5', u'adm_src_6',
       u'adm_src_7', u'adm_src_8', u'adm_src_10', u'adm_src_11', u'adm_src_13',
       u'adm_src_14', u'adm_src_22', u'adm_src_25', u'adm_1', u'adm_2',
       u'adm_3', u'adm_4', u'adm_7', u'number_treatment',
       u'num_lab_procedures', u'num_procedures', u'num_medications',
       u'number_outpatient', u'number_emergency', u'number_inpatient',
       u'number_diagnoses', u'insulin', u'metformin', u'pioglitazone',
       u'glimepiride', u'glipizide', u'repaglinide', u'nateglinide',
       u'ComplexHbA1c', u'add_in_out', u'add_procs_meds', u'div_visits_time',
       u'div_em_time', u'div_visit_med', u'div_em_med', u'sum_ch_med',
       u'number_treatment_0', u'number_treatment_1', u'number_treatment_2',
       u'number_treatment_3', u'Diabetis_3', u'Circulatory_3', u'Digestive_3',
       u'Genitourinary_3', u'Poisoning_3', u'Muscoskeletal_3', u'Neoplasms_3',
       u'Respiratory_3', u'readmitted'],
      dtype='object')

all_extended_extra_diag_3_last_all_readmisssion_vs_none

DataSet:
**********
**********
SIZE: 1.0
NAME: all_extended_extra_diag_3_last_all_readmisssion_vs_none
(67182, 69)
ALL TRAIN: (47027, 68)
TRAIN: [0's: 27849 1's: 19178 ]
ALL TEST: (20155, 68)
TEST: [0's: 11936 1's: 8219 ]

Num experiment: 0 / 5
****************
FS: combine_fs
SM: none
CLS: logReg
METRIC: recall
Fitting 5 folds for each of 336 candidates, totalling 1680 fits
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   18.9s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed:  7.4min
[Parallel(n_jobs=-1)]: Done 1218 tasks      | elapsed: 12.4min
[Parallel(n_jobs=-1)]: Done 1680 out of 1680 | elapsed: 19.0min finished
TRAIN f1 (weighted): 0.559
TRAIN Precision [c=0,1]: [ 0.65943918  0.46622427]
TRAIN Recall [c=0,1]: [ 0.51764875  0.61179476]
TRAIN AUC: 0.565
TRAIN Sensibility: 0.611794764835
TRAIN Specificity:  0.517648748609

CV INNER metric: recall
CV INNER selected params ['balanced', 0.0001, 'l2', 5]
CV INNER score: 0.61126173097

CV OUTER f1-weighted score: 0.562  (+/-0.011)
CV OUTER prec score [c=0,1]: 0.652 (+/- 0.010), 0.480  (+/- 0.033)
CV OUTER rec  score [c=0,1]: 0.576 (+/- 0.125), 0.549  (+/- 0.122)
CV OUTER AUC score: 0.592  (+/-0.012)
CV OUTER Sensibility score: 0.549  (+/-0.122)
CV OUTER Specificity score: 0.576  (+/-0.125)
Selected params (bests from CV) ['balanced', 0.0001, 'l2', 5]

TEST f1 (weighted): 0.562
TEST Precision [c=0,1]: [ 0.65919095  0.46801093]
TEST Recall [c=0,1]: [ 0.52697721  0.60433143]
TEST AUC: 0.566
TEST Sensibility: 0.604331427181
TEST Specificity: 0.526977211796
Confussion matrix:
         | PRED
REAL-->  v 
[[6290 5646]
 [3252 4967]]

Total time: 1159.915833

Num experiment: 1 / 5
****************
FS: combine_fs
SM: none
CLS: logReg
METRIC: f1
Fitting 5 folds for each of 336 candidates, totalling 1680 fits
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   15.5s
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed:  4.0min
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed:  7.6min
[Parallel(n_jobs=-1)]: Done 1218 tasks      | elapsed: 12.6min
[Parallel(n_jobs=-1)]: Done 1680 out of 1680 | elapsed: 19.3min finished
TRAIN f1 (weighted): 0.604
TRAIN Precision [c=0,1]: [ 0.67719775  0.51081742]
TRAIN Recall [c=0,1]: [ 0.6265216  0.566326 ]
TRAIN AUC: 0.596
TRAIN Sensibility: 0.56632599854
TRAIN Specificity:  0.626521598621

CV INNER metric: f1
CV INNER selected params ['balanced', 0.01, 'l1', 50]
CV INNER score: 0.534223569635

CV OUTER f1-weighted score: 0.602  (+/-0.003)
CV OUTER prec score [c=0,1]: 0.674 (+/- 0.003), 0.508  (+/- 0.003)
CV OUTER rec  score [c=0,1]: 0.625 (+/- 0.003), 0.562  (+/- 0.006)
CV OUTER AUC score: 0.635  (+/-0.003)
CV OUTER Sensibility score: 0.562  (+/-0.006)
CV OUTER Specificity score: 0.625  (+/-0.003)
Selected params (bests from CV) ['balanced', 0.01, 'l1', 50]

TEST f1 (weighted): 0.605
TEST Precision [c=0,1]: [ 0.67811705  0.51109168]
TEST Recall [c=0,1]: [ 0.62516756  0.56904733]
TEST AUC: 0.597
TEST Sensibility: 0.569047329359
TEST Specificity: 0.625167560322
Confussion matrix:
         | PRED
REAL-->  v 
[[7462 4474]
 [3542 4677]]

Total time: 1176.53580594

Num experiment: 2 / 5
****************
FS: none
SM: none
CLS: logReg
METRIC: recall
Fitting 5 folds for each of 56 candidates, totalling 280 fits
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   17.7s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.9min finished
TRAIN f1 (weighted): 0.609
TRAIN Precision [c=0,1]: [ 0.68185529  0.51605182]
TRAIN Recall [c=0,1]: [ 0.62975331  0.57331317]
TRAIN AUC: 0.602
TRAIN Sensibility: 0.573313171342
TRAIN Specificity:  0.629753312507

CV INNER metric: recall
CV INNER selected params ['balanced', 0.01, 'l1']
CV INNER score: 0.571671880431

CV OUTER f1-weighted score: 0.606  (+/-0.002)
CV OUTER prec score [c=0,1]: 0.678 (+/- 0.002), 0.512  (+/- 0.002)
CV OUTER rec  score [c=0,1]: 0.627 (+/- 0.004), 0.568  (+/- 0.006)
CV OUTER AUC score: 0.641  (+/-0.002)
CV OUTER Sensibility score: 0.568  (+/-0.006)
CV OUTER Specificity score: 0.627  (+/-0.004)
Selected params (bests from CV) ['balanced', 0.01, 'l1']

TEST f1 (weighted): 0.607
TEST Precision [c=0,1]: [ 0.68061634  0.51333406]
TEST Recall [c=0,1]: [ 0.6254189   0.57379243]
TEST AUC: 0.600
TEST Sensibility: 0.573792432169
TEST Specificity: 0.625418900804
Confussion matrix:
         | PRED
REAL-->  v 
[[7465 4471]
 [3503 4716]]

Total time: 312.163750887

Num experiment: 3 / 5
****************
FS: none
SM: none
CLS: logReg
METRIC: f1
Fitting 5 folds for each of 56 candidates, totalling 280 fits
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:   17.7s
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 280 out of 280 | elapsed:  4.9min finished
TRAIN f1 (weighted): 0.612
TRAIN Precision [c=0,1]: [ 0.68287698  0.51926814]
TRAIN Recall [c=0,1]: [ 0.63582175  0.57122745]
TRAIN AUC: 0.604
TRAIN Sensibility: 0.571227448118
TRAIN Specificity:  0.635821753025

CV INNER metric: f1
CV INNER selected params ['balanced', 0.05, 'l2']
CV INNER score: 0.540786419902

CV OUTER f1-weighted score: 0.609  (+/-0.004)
CV OUTER prec score [c=0,1]: 0.680 (+/- 0.004), 0.516  (+/- 0.004)
CV OUTER rec  score [c=0,1]: 0.634 (+/- 0.002), 0.566  (+/- 0.007)
CV OUTER AUC score: 0.643  (+/-0.002)
CV OUTER Sensibility score: 0.566  (+/-0.007)
CV OUTER Specificity score: 0.634  (+/-0.002)
Selected params (bests from CV) ['balanced', 0.05, 'l2']

TEST f1 (weighted): 0.607
TEST Precision [c=0,1]: [ 0.67993833  0.5137474 ]
TEST Recall [c=0,1]: [ 0.62809987  0.57062903]
TEST AUC: 0.599
TEST Sensibility: 0.570629030296
TEST Specificity: 0.628099865952
Confussion matrix:
         | PRED
REAL-->  v 
[[7497 4439]
 [3529 4690]]

Total time: 314.38953805

Num experiment: 4 / 5
****************
FS: rfe_rf_fs
SM: none
CLS: logReg
METRIC: recall
Fitting 5 folds for each of 224 candidates, totalling 1120 fits
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed: 24.2min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed: 58.6min
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed: 106.7min
[Parallel(n_jobs=-1)]: Done 1120 out of 1120 | elapsed: 154.9min finished
TRAIN f1 (weighted): 0.552
TRAIN Precision [c=0,1]: [ 0.66123512  0.46229645]
TRAIN Recall [c=0,1]: [ 0.4925132   0.63359057]
TRAIN AUC: 0.563
TRAIN Sensibility: 0.633590572531
TRAIN Specificity:  0.492513196165

CV INNER metric: recall
CV INNER selected params [10, 'balanced', 0.001, 0.1, 'l1']
CV INNER score: 0.655509210984

CV OUTER f1-weighted score: 0.537  (+/-0.004)
CV OUTER prec score [c=0,1]: 0.658 (+/- 0.003), 0.453  (+/- 0.003)
CV OUTER rec  score [c=0,1]: 0.454 (+/- 0.010), 0.657  (+/- 0.008)
CV OUTER AUC score: 0.586  (+/-0.004)
CV OUTER Sensibility score: 0.657  (+/-0.008)
CV OUTER Specificity score: 0.454  (+/-0.010)
Selected params (bests from CV) [10, 'balanced', 0.001, 0.1, 'l1']

TEST f1 (weighted): 0.555
TEST Precision [c=0,1]: [ 0.6622237   0.46431133]
TEST Recall [c=0,1]: [ 0.49949732  0.63000365]
TEST AUC: 0.565
TEST Sensibility: 0.630003650079
TEST Specificity: 0.499497319035
Confussion matrix:
         | PRED
REAL-->  v 
[[5962 5974]
 [3041 5178]]

Total time: 9483.84614182

Num experiment: 5 / 5
****************
FS: rfe_rf_fs
SM: none
CLS: logReg
METRIC: f1
Fitting 5 folds for each of 224 candidates, totalling 1120 fits
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
/home/ilmira/.conda/envs/readmision/lib/python2.7/site-packages/sklearn/metrics/classification.py:1135: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.
  'precision', 'predicted', average, warn_for)
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  4.1min
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed: 23.9min
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed: 58.3min
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed: 105.9min
[Parallel(n_jobs=-1)]: Done 1120 out of 1120 | elapsed: 153.9min finished
TRAIN f1 (weighted): 0.552
TRAIN Precision [c=0,1]: [ 0.66123512  0.46229645]
TRAIN Recall [c=0,1]: [ 0.4925132   0.63359057]
TRAIN AUC: 0.563
TRAIN Sensibility: 0.633590572531
TRAIN Specificity:  0.492513196165

CV INNER metric: f1
CV INNER selected params [10, 'balanced', 0.001, 0.1, 'l1']
CV INNER score: 0.535457744838

CV OUTER f1-weighted score: 0.537  (+/-0.004)
CV OUTER prec score [c=0,1]: 0.658 (+/- 0.003), 0.453  (+/- 0.003)
CV OUTER rec  score [c=0,1]: 0.454 (+/- 0.010), 0.657  (+/- 0.008)
CV OUTER AUC score: 0.586  (+/-0.004)
CV OUTER Sensibility score: 0.657  (+/-0.008)
CV OUTER Specificity score: 0.454  (+/-0.010)
Selected params (bests from CV) [10, 'balanced', 0.001, 0.1, 'l1']

TEST f1 (weighted): 0.555
TEST Precision [c=0,1]: [ 0.6622237   0.46431133]
TEST Recall [c=0,1]: [ 0.49949732  0.63000365]
TEST AUC: 0.565
TEST Sensibility: 0.630003650079
TEST Specificity: 0.499497319035
Confussion matrix:
         | PRED
REAL-->  v 
[[5962 5974]
 [3041 5178]]

Total time: 9430.87909889

In [201]:
if 'dfAux' not in locals():
    print "Not dfAux"
    dfAux = pd.DataFrame()

if 'df' in locals():
    print "Exist df"
    dfAux = df.copy()

df = pd.DataFrame(np.array(res[0]).reshape(len(res[0]),35), columns=
                          ["exp", "name",
                           "size_tr","fs","sm","cls","metric","params",
                           "tr_sens","tr_spec","tr_auc",
                           "tr_prec","tr_rec","tr_f1",
                           "cv_sens_mean","cv_sens_std","cv_spec_mean","cv_spec_std","cv_auc_mean","cv_auc_std",
                           "cv_prec_mean","cv_prec_std","cv_rec_mean","cv_rec_std",
                           "cv_f1_mean","cv_f1_std",
                           "test_sens","test_spec","test_auc",
                           "test_rec","test_prec","test_f1",
                           "cm_test",
                           "time","pipeline"])

df[["size_tr","fs","sm","metric","cls","params", 
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_std","cv_f1_mean","cv_sens_mean","cv_sens_std","cv_spec_mean","cv_spec_std","cv_auc_mean",
    "test_auc","test_sens","test_spec","test_f1"]]


Not dfAux
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-201-35367bab853d> in <module>()
      7     dfAux = df.copy()
      8 
----> 9 df = pd.DataFrame(np.array(res[0]).reshape(len(res[0]),35), columns=
     10                           ["exp", "name",
     11                            "size_tr","fs","sm","cls","metric","params",

IndexError: list index out of range

In [30]:
dfAux =pd.DataFrame()
df = pd.concat((dfAux, df))
df[["size_tr","fs","sm","metric","cls","params", 
    "tr_auc","tr_sens","tr_spec",
    "cv_auc_mean","cv_f1_mean","cv_sens_mean","cv_spec_mean",
    "test_auc","test_f1","test_sens","test_spec"]].sort_values("cv_auc_mean", ascending=False)


Out[30]:
size_tr fs sm metric cls params tr_auc tr_sens tr_spec cv_auc_mean cv_f1_mean cv_sens_mean cv_spec_mean test_auc test_f1 test_sens test_spec
5 0.0100 none none f1_weighted logReg [None, 0.5, l1] 0.5577 0.1190 0.9964 0.6449 0.8142 0.0615 0.9880 0.5200 0.8178 0.0528 0.9872
3 0.0100 none none recall logReg [balanced, 0.1, l1] 0.6882 0.8333 0.5432 0.6278 0.6403 0.5846 0.5735 0.5542 0.6062 0.5845 0.5238
4 0.0100 none none f1 logReg [balanced, 0.1, l1] 0.6882 0.8333 0.5432 0.6278 0.6403 0.5846 0.5735 0.5542 0.6062 0.5845 0.5238
2 0.0100 none none f1_weighted rf [entropy, None, 200] 1.0000 1.0000 1.0000 0.5663 0.8018 0.0000 1.0000 0.5018 0.8093 0.0044 0.9992
8 0.0100 none none f1_weighted knn [uniform, 9] 0.5238 0.0476 1.0000 0.5619 0.8018 0.0000 1.0000 0.5018 0.8091 0.0056 0.9981
1 0.0100 none none f1 rf [gini, 4, 200] 0.7850 0.5952 0.9748 0.5361 0.7868 0.0154 0.9566 0.5280 0.8115 0.1057 0.9504
0 0.0100 none none recall rf [entropy, 4, 200] 0.7814 0.5952 0.9676 0.5312 0.7860 0.0154 0.9542 0.5291 0.8083 0.1190 0.9391
9 0.0100 none none recall nb [] 0.5773 1.0000 0.1547 0.5188 0.3305 0.8154 0.2169 0.5001 0.2426 0.8576 0.1425
10 0.0100 none none f1 nb [] 0.5773 1.0000 0.1547 0.5188 0.3305 0.8154 0.2169 0.5001 0.2426 0.8576 0.1425
11 0.0100 none none f1_weighted nb [] 0.5773 1.0000 0.1547 0.5188 0.3305 0.8154 0.2169 0.5001 0.2426 0.8576 0.1425
6 0.0100 none none recall knn [uniform, 1] 1.0000 1.0000 1.0000 0.5174 0.7836 0.1385 0.8964 0.5201 0.7807 0.1680 0.8722
7 0.0100 none none f1 knn [uniform, 3] 0.6238 0.2619 0.9856 0.5095 0.8029 0.0462 0.9735 0.5137 0.8077 0.0673 0.9601

In [ ]: