In [1]:

    
# %load nbinit.py
from IPython.display import HTML
HTML("<style>.container { width: 100% !important; padding-left: 1em; padding-right: 2em; } div.output_stderr { background: #FFA; }</style>")









    Out[1]:

Decision Tree

Let's see how well a decision tree can classify the data. Hereby we need to consider

the parameters to the classifier, and
the features of the data set that will be used. We may just explore the impact of the maximum depth of the decision tree. Two of the 16 features ('day' and 'month') may not be useful because they reflect a date, and we're not looking for seasonal effects. So, it's fairly safe to take them out.

Once the dataset is loaded we will convert the categorical data into numeric values.

Finding the right parameters and features for the best performing classifier can be a challenge. The number of possible configurations grows quickly, and knowing how they perform requires training and testing with each of them.

We may also run the training and testing on a configuration multiple times with different random splits of the data set. The performance metrics will be avaraged over the iterations.

We use percision, recall, and the F1 score to evaluate each configuration.



In [29]:

    
### Load Packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
import sklearn.tree
import pydot_ng as pdot
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
import itertools

Reading Data



In [2]:

    
### Read data
DATAFILE = '/home/data/archive.ics.uci.edu/BankMarketing/bank.csv'
df = pd.read_csv(DATAFILE, sep=';')



In [3]:

    
### use sets and '-' difference operation 'A-B'. Also there is a symmetric different '^'
all_features = set(df.columns)-set(['y'])
num_features = set(df.describe().columns)
cat_features = all_features-num_features
print("All features:         ", ", ".join(all_features), "\nNumerical features:   ", ", ".join(num_features), "\nCategorical features: ", ", ".join(cat_features))









    



All features:          balance, day, education, previous, loan, contact, pdays, marital, duration, job, campaign, month, poutcome, age, default, housing 
Numerical features:    balance, day, duration, previous, campaign, age, pdays 
Categorical features:  job, education, month, loan, contact, poutcome, default, marital, housing



In [30]:

    
### convert to categorical variables to numeric ones
level_substitution = {}

def levels2index(levels):
    dct = {}
    for i in range(len(levels)):
        dct[levels[i]] = i
    return dct

df_num = df.copy()

for c in cat_features:
    level_substitution[c] = levels2index(df[c].unique())
    df_num[c].replace(level_substitution[c], inplace=True)

## same for target
df_num.y.replace({'no':0, 'yes':1}, inplace=True)
df_num









    Out[30]:






  
    
      
      age
      job
      marital
      education
      default
      balance
      housing
      loan
      contact
      day
      month
      duration
      campaign
      pdays
      previous
      poutcome
      y
    
  
  
    
      0
      30
      0
      0
      0
      0
      1787
      0
      0
      0
      19
      0
      79
      1
      -1
      0
      0
      0
    
    
      1
      33
      1
      0
      1
      0
      4789
      1
      1
      0
      11
      1
      220
      1
      339
      4
      1
      0
    
    
      2
      35
      2
      1
      2
      0
      1350
      1
      0
      0
      16
      2
      185
      1
      330
      1
      1
      0
    
    
      3
      30
      2
      0
      2
      0
      1476
      1
      1
      1
      3
      3
      199
      4
      -1
      0
      0
      0
    
    
      4
      59
      3
      0
      1
      0
      0
      1
      0
      1
      5
      1
      226
      1
      -1
      0
      0
      0
    
    
      5
      35
      2
      1
      2
      0
      747
      0
      0
      0
      23
      4
      141
      2
      176
      3
      1
      0
    
    
      6
      36
      4
      0
      2
      0
      307
      1
      0
      0
      14
      1
      341
      1
      330
      2
      2
      0
    
    
      7
      39
      5
      0
      1
      0
      147
      1
      0
      0
      6
      1
      151
      2
      -1
      0
      0
      0
    
    
      8
      41
      6
      0
      2
      0
      221
      1
      0
      1
      14
      1
      57
      2
      -1
      0
      0
      0
    
    
      9
      43
      1
      0
      0
      0
      -88
      1
      1
      0
      17
      2
      313
      1
      147
      2
      1
      0
    
    
      10
      39
      1
      0
      1
      0
      9374
      1
      0
      1
      20
      1
      273
      1
      -1
      0
      0
      0
    
    
      11
      43
      7
      0
      1
      0
      264
      1
      0
      0
      17
      2
      113
      2
      -1
      0
      0
      0
    
    
      12
      36
      5
      0
      2
      0
      1109
      0
      0
      0
      13
      5
      328
      2
      -1
      0
      0
      0
    
    
      13
      20
      8
      1
      1
      0
      502
      0
      0
      0
      30
      2
      261
      1
      -1
      0
      0
      1
    
    
      14
      31
      3
      0
      1
      0
      360
      1
      1
      0
      29
      6
      89
      1
      241
      1
      1
      0
    
    
      15
      40
      2
      0
      2
      0
      194
      0
      1
      0
      29
      5
      189
      2
      -1
      0
      0
      0
    
    
      16
      56
      5
      0
      1
      0
      4073
      0
      0
      0
      27
      5
      239
      5
      -1
      0
      0
      0
    
    
      17
      37
      7
      1
      2
      0
      2317
      1
      0
      0
      20
      2
      114
      1
      152
      2
      1
      0
    
    
      18
      25
      3
      1
      0
      0
      -221
      1
      0
      1
      23
      1
      250
      1
      -1
      0
      0
      0
    
    
      19
      31
      1
      0
      1
      0
      132
      0
      0
      0
      7
      7
      148
      1
      152
      1
      2
      0
    
    
      20
      38
      2
      2
      3
      0
      0
      1
      0
      0
      18
      8
      96
      2
      -1
      0
      0
      0
    
    
      21
      42
      2
      2
      2
      0
      16
      0
      0
      0
      19
      8
      140
      3
      -1
      0
      0
      0
    
    
      22
      44
      1
      1
      1
      0
      106
      0
      0
      1
      12
      3
      109
      2
      -1
      0
      0
      0
    
    
      23
      44
      6
      0
      1
      0
      93
      0
      0
      0
      7
      7
      125
      2
      -1
      0
      0
      0
    
    
      24
      26
      9
      0
      2
      0
      543
      0
      0
      0
      30
      6
      169
      3
      -1
      0
      0
      0
    
    
      25
      41
      2
      0
      2
      0
      5883
      0
      0
      0
      20
      8
      182
      2
      -1
      0
      0
      0
    
    
      26
      55
      3
      0
      0
      0
      627
      1
      0
      1
      5
      1
      247
      1
      -1
      0
      0
      0
    
    
      27
      67
      10
      0
      3
      0
      696
      0
      0
      2
      17
      5
      119
      1
      105
      2
      1
      0
    
    
      28
      56
      4
      0
      1
      0
      784
      0
      1
      0
      30
      7
      149
      2
      -1
      0
      0
      0
    
    
      29
      53
      7
      0
      1
      0
      105
      0
      1
      0
      21
      5
      74
      2
      -1
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      4491
      35
      3
      1
      1
      0
      0
      1
      0
      0
      16
      2
      169
      1
      -1
      0
      0
      0
    
    
      4492
      32
      5
      1
      1
      0
      309
      1
      1
      0
      16
      2
      346
      1
      234
      3
      1
      0
    
    
      4493
      28
      5
      1
      2
      0
      0
      1
      0
      1
      4
      3
      205
      6
      -1
      0
      0
      0
    
    
      4494
      26
      5
      1
      1
      0
      668
      1
      0
      1
      28
      1
      576
      3
      -1
      0
      0
      1
    
    
      4495
      48
      2
      0
      2
      0
      1175
      1
      0
      2
      18
      8
      1476
      3
      -1
      0
      0
      0
    
    
      4496
      30
      3
      1
      1
      0
      363
      0
      0
      0
      28
      7
      171
      3
      -1
      0
      0
      0
    
    
      4497
      31
      6
      1
      2
      0
      38
      0
      0
      0
      20
      8
      185
      2
      -1
      0
      0
      0
    
    
      4498
      31
      2
      0
      2
      0
      1183
      1
      0
      1
      27
      1
      676
      6
      -1
      0
      0
      0
    
    
      4499
      45
      3
      2
      0
      0
      942
      0
      0
      0
      21
      8
      362
      1
      -1
      0
      0
      0
    
    
      4500
      38
      7
      0
      1
      0
      4196
      1
      0
      0
      12
      1
      193
      2
      -1
      0
      0
      0
    
    
      4501
      34
      2
      0
      2
      0
      297
      1
      0
      0
      26
      5
      63
      4
      -1
      0
      0
      0
    
    
      4502
      42
      1
      0
      1
      0
      -91
      1
      1
      0
      5
      4
      43
      1
      -1
      0
      0
      0
    
    
      4503
      60
      4
      0
      0
      0
      362
      0
      1
      0
      29
      7
      816
      6
      -1
      0
      0
      1
    
    
      4504
      42
      3
      1
      1
      0
      1080
      1
      1
      0
      13
      1
      951
      3
      370
      4
      1
      1
    
    
      4505
      32
      7
      1
      1
      0
      620
      1
      0
      1
      26
      1
      1234
      3
      -1
      0
      0
      1
    
    
      4506
      42
      0
      2
      2
      0
      -166
      0
      0
      0
      29
      5
      85
      4
      -1
      0
      0
      0
    
    
      4507
      33
      1
      0
      1
      0
      288
      1
      0
      0
      17
      2
      306
      1
      -1
      0
      0
      0
    
    
      4508
      42
      7
      0
      3
      0
      642
      1
      1
      1
      16
      1
      509
      2
      -1
      0
      0
      0
    
    
      4509
      51
      5
      0
      2
      0
      2506
      0
      0
      0
      30
      8
      210
      3
      -1
      0
      0
      0
    
    
      4510
      36
      5
      2
      1
      0
      566
      1
      0
      1
      20
      1
      129
      2
      -1
      0
      0
      0
    
    
      4511
      46
      3
      0
      1
      0
      668
      1
      0
      1
      15
      1
      1263
      2
      -1
      0
      0
      1
    
    
      4512
      40
      3
      0
      1
      0
      1100
      1
      0
      1
      29
      1
      660
      2
      -1
      0
      0
      0
    
    
      4513
      49
      3
      0
      1
      0
      322
      0
      0
      0
      14
      5
      356
      2
      -1
      0
      0
      0
    
    
      4514
      38
      3
      0
      1
      0
      1205
      1
      0
      0
      20
      2
      45
      4
      153
      1
      1
      0
    
    
      4515
      32
      1
      1
      1
      0
      473
      1
      0
      0
      7
      7
      624
      5
      -1
      0
      0
      0
    
    
      4516
      33
      1
      0
      1
      0
      -333
      1
      0
      0
      30
      7
      329
      5
      -1
      0
      0
      0
    
    
      4517
      57
      4
      0
      2
      1
      -3313
      1
      1
      1
      9
      1
      153
      1
      -1
      0
      0
      0
    
    
      4518
      57
      5
      0
      1
      0
      295
      0
      0
      0
      19
      5
      151
      11
      -1
      0
      0
      0
    
    
      4519
      28
      3
      0
      1
      0
      1137
      0
      0
      0
      6
      4
      129
      4
      211
      3
      2
      0
    
    
      4520
      44
      6
      1
      2
      0
      1136
      1
      1
      0
      3
      2
      345
      2
      249
      7
      2
      0
    
  

4521 rows × 17 columns



In [33]:

    
### create feature matrix and target vector
X = df_num[list(all_features-set(['day', 'month']))].as_matrix()
y = df_num.y.as_matrix()
X, y









    Out[33]:





(array([[1787,    0,    0, ...,   30,    0,    0],
        [4789,    1,    4, ...,   33,    0,    1],
        [1350,    2,    1, ...,   35,    0,    1],
        ..., 
        [ 295,    1,    0, ...,   57,    0,    0],
        [1137,    1,    3, ...,   28,    0,    0],
        [1136,    2,    7, ...,   44,    0,    1]]),
 array([0, 0, 0, ..., 0, 0, 0]))

Evaluation

Test how Maximum Depth of tree impacts performance



In [34]:

    
for d in [3, 5, 7, 11, 13]:
    clf = DecisionTreeClassifier(max_depth=d)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)
    clf.fit(X_train, y_train)
    ŷ = clf.predict(X_test)
    print('Depth %d' % d)
    print(classification_report(y_test, ŷ))









    



Depth 3
             precision    recall  f1-score   support

          0       0.93      0.97      0.95      1620
          1       0.55      0.35      0.43       189

avg / total       0.89      0.90      0.89      1809

Depth 5
             precision    recall  f1-score   support

          0       0.93      0.96      0.94      1620
          1       0.51      0.34      0.41       189

avg / total       0.88      0.90      0.89      1809

Depth 7
             precision    recall  f1-score   support

          0       0.93      0.96      0.94      1620
          1       0.51      0.34      0.41       189

avg / total       0.88      0.90      0.89      1809

Depth 11
             precision    recall  f1-score   support

          0       0.93      0.94      0.93      1620
          1       0.41      0.38      0.39       189

avg / total       0.87      0.88      0.88      1809

Depth 13
             precision    recall  f1-score   support

          0       0.93      0.92      0.92      1620
          1       0.37      0.42      0.40       189

avg / total       0.87      0.86      0.87      1809

Two methods from sklearn.metrics can be helpful:

confusion_matrix produces a confusion matrix
precision_recall_fscore_support returns a matrix with values for each of them across all target levels.



In [36]:

    
cm = confusion_matrix(y_test, ŷ)
cm









    Out[36]:





array([[1484,  136],
       [ 109,   80]])



In [37]:

    
prf1s = precision_recall_fscore_support(y_test, ŷ)
prf1s









    Out[37]:





(array([ 0.93157564,  0.37037037]),
 array([ 0.91604938,  0.42328042]),
 array([ 0.92374728,  0.39506173]),
 array([1620,  189]))



In [10]:

    
perf = None
for i in range(100):
    if type(perf)!=type(None):
        perf = np.vstack((perf, np.array(prf1s).reshape(1,8)))
    else:
        perf = np.array(prf1s).reshape(1,8)
perf_agg = perf.mean(axis=0)
pd.DataFrame(perf_agg.reshape(1,8), columns=[[b for a in ['Precision', 'Recall', 'F1_score', 'Support'] for b in [a, a]], ['no', 'yes']*4])
##pd.DataFrame([5,5, 'a|b|c'] + list(perf.mean(axis=0)), columns=perf_df.columns)



In [14]:

    
performance_df = pd.DataFrame(columns=[
        ['Params']*3 + [b for a in ['Precision', 'Recall', 'F1_score', 'Support'] for b in [a, a]],
        ['MaxDepth', 'Nfeature', 'Features'] + ['no', 'yes']*4
    ])
tempdf = pd.concat([
        pd.DataFrame({'a': [1], 'b': [2], 'c': ['Hello']}),
        pd.DataFrame(np.zeros((1,8)))
    ], axis=1, ignore_index=True)

tempdf.columns=performance_df.columns
#performance_df
tempdf









    Out[14]:






  
    
      
      Params
      Precision
      Recall
      F1_score
      Support
    
    
      
      MaxDepth
      Nfeature
      Features
      no
      yes
      no
      yes
      no
      yes
      no
      yes
    
  
  
    
      0
      1
      2
      Hello
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0



In [274]:

    
pd.DataFrame(np.zeros(8).reshape(1,8))

The Heavy Lifting

Now, let's run the performance evaluation across a number of configurations. We'll collect the results for each configuration into a dataframe.



In [41]:

    
# creating a template (i.e. empty table)
performance_template_df = pd.DataFrame(columns= [
        ['Params']*3 + [b for a in ['Precision', 'Recall', 'F1_score', 'Support'] for b in [a, a]],
        ['MaxDepth', 'Nfeature', 'Features'] + ['no', 'yes']*4
    ])
performance_template_df









    Out[41]:






  
    
      
      Params
      Precision
      Recall
      F1_score
      Support
    
    
      
      MaxDepth
      Nfeature
      Features
      no
      yes
      no
      yes
      no
      yes
      no
      yes

The following code implements nested loops for MaxDepth, number and permutation of features. In addition, we have an internal loop to aggregate the performance metrics over a number of different random splits.

The outer two loops, however, only iterate over one value each. The commmented code shows how they should run...



In [42]:

    
%%time
performance_df = performance_template_df.copy() #-- always start fresh

for MaxDepth in [5]: ###range(5,9):
    for Nftr in [8]: ###[len(all_features) - k for k in range(len(all_features)-2))]:
        for ftrs in itertools.combinations(all_features-set(['day', 'month']), Nftr):
            X = df_num[list(ftrs)].as_matrix()
            clf = DecisionTreeClassifier(max_depth=MaxDepth)

            perf_arr = None    #-- this array will hold results for different random samples
            for i in range(10): ### running train and test on different random samples
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=i)
                clf.fit(X_train, y_train)
                ŷ = clf.predict(X_test)
                #Prec, Recall, F1, Supp 
                prf1s = precision_recall_fscore_support(y_test, ŷ)

                ## 
                if type(perf_arr)!=type(None):
                    perf_arr = np.vstack((perf, np.array(prf1s).reshape(1,8)))
                else:
                    perf_arr = np.array(prf1s).reshape(1,8)
            perf_agg = perf_arr.mean(axis=0)  #-- mean over rows, for each column
            perf_df = pd.concat([    #-- creating a 1 row dataframe is a bit tricky because of the different data types
                        pd.DataFrame({'a': [MaxDepth], 'b': [Nftr], 'c': ['|'.join(list(ftrs))]}),
                        pd.DataFrame(perf_agg.reshape(1, 8))
                    ], axis=1, ignore_index=True)
            perf_df.columns=performance_df.columns
            performance_df = performance_df.append(perf_df, ignore_index=True)









    



/usr/lib64/python3.4/site-packages/sklearn/metrics/classification.py:1113: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples.
  'precision', 'predicted', average, warn_for)






    



CPU times: user 2min 15s, sys: 13.7 ms, total: 2min 15s
Wall time: 2min 15s



In [43]:

    
performance_df









    Out[43]:






  
    
      
      Params
      Precision
      Recall
      F1_score
      Support
    
    
      
      MaxDepth
      Nfeature
      Features
      no
      yes
      no
      yes
      no
      yes
      no
      yes
    
  
  
    
      0
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933046
      0.463507
      0.942820
      0.422060
      0.937899
      0.441508
      1611.732673
      188.316832
    
    
      1
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932785
      0.462509
      0.943047
      0.419750
      0.937859
      0.438769
      1611.732673
      188.316832
    
    
      2
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932806
      0.463350
      0.943060
      0.419915
      0.937876
      0.439048
      1611.732673
      188.316832
    
    
      3
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932874
      0.464323
      0.943047
      0.420492
      0.937909
      0.439919
      1611.732673
      188.316832
    
    
      4
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932799
      0.463790
      0.943085
      0.419832
      0.937883
      0.438933
      1611.732673
      188.316832
    
    
      5
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932806
      0.463350
      0.943060
      0.419915
      0.937876
      0.439048
      1611.732673
      188.316832
    
    
      6
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932779
      0.463181
      0.943085
      0.419667
      0.937872
      0.438655
      1611.732673
      188.316832
    
    
      7
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933073
      0.464120
      0.942884
      0.422225
      0.937943
      0.441793
      1611.732673
      188.316832
    
    
      8
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933084
      0.464179
      0.942884
      0.422307
      0.937949
      0.441880
      1611.732673
      188.316832
    
    
      9
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933220
      0.464450
      0.942833
      0.423380
      0.937998
      0.442813
      1611.732673
      188.316832
    
    
      10
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932946
      0.464037
      0.942972
      0.421152
      0.937915
      0.440707
      1611.732673
      188.316832
    
    
      11
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933084
      0.464179
      0.942884
      0.422307
      0.937949
      0.441880
      1611.732673
      188.316832
    
    
      12
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.933125
      0.464300
      0.942871
      0.422637
      0.937965
      0.442191
      1611.732673
      188.316832
    
    
      13
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932785
      0.462509
      0.943047
      0.419750
      0.937859
      0.438769
      1611.732673
      188.316832
    
    
      14
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932907
      0.465465
      0.943085
      0.420740
      0.937944
      0.440325
      1611.732673
      188.316832
    
    
      15
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932789
      0.463507
      0.943085
      0.419750
      0.937878
      0.438795
      1611.732673
      188.316832
    
    
      16
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932785
      0.462509
      0.943047
      0.419750
      0.937859
      0.438769
      1611.732673
      188.316832
    
    
      17
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932754
      0.461150
      0.943035
      0.419502
      0.937836
      0.438346
      1611.732673
      188.316832
    
    
      18
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932864
      0.464186
      0.943047
      0.420410
      0.937903
      0.439798
      1611.732673
      188.316832
    
    
      19
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932799
      0.463790
      0.943085
      0.419832
      0.937883
      0.438933
      1611.732673
      188.316832
    
    
      20
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932793
      0.462539
      0.943035
      0.419832
      0.937858
      0.438895
      1611.732673
      188.316832
    
    
      21
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932767
      0.462093
      0.943060
      0.419585
      0.937854
      0.438499
      1611.732673
      188.316832
    
    
      22
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932925
      0.465110
      0.943060
      0.420905
      0.937943
      0.440517
      1611.732673
      188.316832
    
    
      23
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932874
      0.464323
      0.943047
      0.420492
      0.937909
      0.439919
      1611.732673
      188.316832
    
    
      24
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932865
      0.464450
      0.943060
      0.420410
      0.937909
      0.439813
      1611.732673
      188.316832
    
    
      25
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932799
      0.463790
      0.943085
      0.419832
      0.937883
      0.438933
      1611.732673
      188.316832
    
    
      26
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932782
      0.464150
      0.943111
      0.419667
      0.937885
      0.438671
      1611.732673
      188.316832
    
    
      27
      5.0
      8.0
      balance|education|previous|loan|contact|pdays|...
      0.932767
      0.462093
      0.943060
      0.419585
      0.937854
      0.438499
      1611.732673
      188.316832
    
    
      28
      5.0
      8.0
      balance|education|previous|loan|contact|marita...
      0.932927
      0.462350
      0.942783
      0.421152
      0.937818
      0.440442
      1611.732673
      188.316832
    
    
      29
      5.0
      8.0
      balance|education|previous|loan|contact|marita...
      0.932941
      0.462706
      0.942820
      0.421235
      0.937843
      0.440589
      1611.732673
      188.316832
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2973
      5.0
      8.0
      contact|pdays|marital|job|campaign|poutcome|ag...
      0.932915
      0.465019
      0.943060
      0.420822
      0.937937
      0.440404
      1611.732673
      188.316832
    
    
      2974
      5.0
      8.0
      contact|pdays|marital|job|campaign|poutcome|ag...
      0.932860
      0.465666
      0.943111
      0.420327
      0.937929
      0.439750
      1611.732673
      188.316832
    
    
      2975
      5.0
      8.0
      contact|pdays|marital|job|campaign|poutcome|de...
      0.932830
      0.464838
      0.943098
      0.420080
      0.937906
      0.439347
      1611.732673
      188.316832
    
    
      2976
      5.0
      8.0
      contact|pdays|marital|job|campaign|age|default...
      0.932773
      0.463790
      0.943111
      0.419585
      0.937880
      0.438527
      1611.732673
      188.316832
    
    
      2977
      5.0
      8.0
      contact|pdays|marital|job|poutcome|age|default...
      0.932849
      0.465145
      0.943098
      0.420245
      0.937917
      0.439607
      1611.732673
      188.316832
    
    
      2978
      5.0
      8.0
      contact|pdays|marital|campaign|poutcome|age|de...
      0.932859
      0.465275
      0.943098
      0.420327
      0.937923
      0.439734
      1611.732673
      188.316832
    
    
      2979
      5.0
      8.0
      contact|pdays|duration|job|campaign|poutcome|a...
      0.933086
      0.463589
      0.942795
      0.422390
      0.937909
      0.441804
      1611.732673
      188.316832
    
    
      2980
      5.0
      8.0
      contact|pdays|duration|job|campaign|poutcome|a...
      0.933086
      0.463589
      0.942795
      0.422390
      0.937909
      0.441804
      1611.732673
      188.316832
    
    
      2981
      5.0
      8.0
      contact|pdays|duration|job|campaign|poutcome|d...
      0.933165
      0.464224
      0.942833
      0.422967
      0.937969
      0.442435
      1611.732673
      188.316832
    
    
      2982
      5.0
      8.0
      contact|pdays|duration|job|campaign|age|defaul...
      0.933107
      0.463625
      0.942783
      0.422555
      0.937914
      0.441945
      1611.732673
      188.316832
    
    
      2983
      5.0
      8.0
      contact|pdays|duration|job|poutcome|age|defaul...
      0.933087
      0.463673
      0.942808
      0.422390
      0.937916
      0.441826
      1611.732673
      188.316832
    
    
      2984
      5.0
      8.0
      contact|pdays|duration|campaign|poutcome|age|d...
      0.933086
      0.463589
      0.942795
      0.422390
      0.937909
      0.441804
      1611.732673
      188.316832
    
    
      2985
      5.0
      8.0
      contact|pdays|job|campaign|poutcome|age|defaul...
      0.932870
      0.465770
      0.943111
      0.420410
      0.937935
      0.439876
      1611.732673
      188.316832
    
    
      2986
      5.0
      8.0
      contact|marital|duration|job|campaign|poutcome...
      0.933036
      0.464323
      0.942934
      0.421895
      0.937946
      0.441527
      1611.732673
      188.316832
    
    
      2987
      5.0
      8.0
      contact|marital|duration|job|campaign|poutcome...
      0.933058
      0.463664
      0.942833
      0.422142
      0.937912
      0.441617
      1611.732673
      188.316832
    
    
      2988
      5.0
      8.0
      contact|marital|duration|job|campaign|poutcome...
      0.933138
      0.463712
      0.942770
      0.422802
      0.937925
      0.442159
      1611.732673
      188.316832
    
    
      2989
      5.0
      8.0
      contact|marital|duration|job|campaign|age|defa...
      0.932975
      0.463166
      0.942846
      0.421482
      0.937873
      0.440913
      1611.732673
      188.316832
    
    
      2990
      5.0
      8.0
      contact|marital|duration|job|poutcome|age|defa...
      0.933090
      0.463850
      0.942833
      0.422390
      0.937929
      0.441871
      1611.732673
      188.316832
    
    
      2991
      5.0
      8.0
      contact|marital|duration|campaign|poutcome|age...
      0.933058
      0.463664
      0.942833
      0.422142
      0.937912
      0.441617
      1611.732673
      188.316832
    
    
      2992
      5.0
      8.0
      contact|marital|job|campaign|poutcome|age|defa...
      0.932830
      0.464838
      0.943098
      0.420080
      0.937906
      0.439347
      1611.732673
      188.316832
    
    
      2993
      5.0
      8.0
      contact|duration|job|campaign|poutcome|age|def...
      0.933098
      0.463732
      0.942808
      0.422472
      0.937921
      0.441908
      1611.732673
      188.316832
    
    
      2994
      5.0
      8.0
      pdays|marital|duration|job|campaign|poutcome|a...
      0.933100
      0.463908
      0.942833
      0.422472
      0.937934
      0.441954
      1611.732673
      188.316832
    
    
      2995
      5.0
      8.0
      pdays|marital|duration|job|campaign|poutcome|a...
      0.933100
      0.463908
      0.942833
      0.422472
      0.937934
      0.441954
      1611.732673
      188.316832
    
    
      2996
      5.0
      8.0
      pdays|marital|duration|job|campaign|poutcome|d...
      0.933062
      0.464058
      0.942884
      0.422142
      0.937938
      0.441706
      1611.732673
      188.316832
    
    
      2997
      5.0
      8.0
      pdays|marital|duration|job|campaign|age|defaul...
      0.933120
      0.463846
      0.942808
      0.422637
      0.937933
      0.442071
      1611.732673
      188.316832
    
    
      2998
      5.0
      8.0
      pdays|marital|duration|job|poutcome|age|defaul...
      0.933100
      0.463908
      0.942833
      0.422472
      0.937934
      0.441954
      1611.732673
      188.316832
    
    
      2999
      5.0
      8.0
      pdays|marital|duration|campaign|poutcome|age|d...
      0.933100
      0.463908
      0.942833
      0.422472
      0.937934
      0.441954
      1611.732673
      188.316832
    
    
      3000
      5.0
      8.0
      pdays|marital|job|campaign|poutcome|age|defaul...
      0.932862
      0.466100
      0.943123
      0.420327
      0.937935
      0.439765
      1611.732673
      188.316832
    
    
      3001
      5.0
      8.0
      pdays|duration|job|campaign|poutcome|age|defau...
      0.932995
      0.463234
      0.942833
      0.421647
      0.937878
      0.441081
      1611.732673
      188.316832
    
    
      3002
      5.0
      8.0
      marital|duration|job|campaign|poutcome|age|def...
      0.933068
      0.463728
      0.942833
      0.422225
      0.937917
      0.441702
      1611.732673
      188.316832
    
  

3003 rows × 11 columns

That took a while (about 2 minutes). Once computations take that long we should look at a different way to implement them ... outside the notebook .

Let's see what the best performing configuration with respect to the F1-score of 'yes' is:



In [61]:

    
best = performance_df.F1_score.yes.argmax()
print(performance_df.iloc[best])
print("\nFeatures: ", ', '.join([ '"%s"'%f for f in performance_df.iloc[best].Params.Features.split('|') ], ))









    



Params     MaxDepth                                                    5
           Nfeature                                                    8
           Features    balance|education|previous|loan|contact|pdays|...
Precision  no                                                    0.93322
           yes                                                   0.46445
Recall     no                                                   0.942833
           yes                                                   0.42338
F1_score   no                                                   0.937998
           yes                                                  0.442813
Support    no                                                    1611.73
           yes                                                   188.317
Name: 9, dtype: object

Features:  "balance", "education", "previous", "loan", "contact", "pdays", "duration", "poutcome"



In [ ]:

	age	job	marital	education	default	balance	housing	loan	contact	day	month	duration	campaign	pdays	previous	poutcome	y
0	30	0	0	0	0	1787	0	0	0	19	0	79	1	-1	0	0	0
1	33	1	0	1	0	4789	1	1	0	11	1	220	1	339	4	1	0
2	35	2	1	2	0	1350	1	0	0	16	2	185	1	330	1	1	0
3	30	2	0	2	0	1476	1	1	1	3	3	199	4	-1	0	0	0
4	59	3	0	1	0	0	1	0	1	5	1	226	1	-1	0	0	0
5	35	2	1	2	0	747	0	0	0	23	4	141	2	176	3	1	0
6	36	4	0	2	0	307	1	0	0	14	1	341	1	330	2	2	0
7	39	5	0	1	0	147	1	0	0	6	1	151	2	-1	0	0	0
8	41	6	0	2	0	221	1	0	1	14	1	57	2	-1	0	0	0
9	43	1	0	0	0	-88	1	1	0	17	2	313	1	147	2	1	0
10	39	1	0	1	0	9374	1	0	1	20	1	273	1	-1	0	0	0
11	43	7	0	1	0	264	1	0	0	17	2	113	2	-1	0	0	0
12	36	5	0	2	0	1109	0	0	0	13	5	328	2	-1	0	0	0
13	20	8	1	1	0	502	0	0	0	30	2	261	1	-1	0	0	1
14	31	3	0	1	0	360	1	1	0	29	6	89	1	241	1	1	0
15	40	2	0	2	0	194	0	1	0	29	5	189	2	-1	0	0	0
16	56	5	0	1	0	4073	0	0	0	27	5	239	5	-1	0	0	0
17	37	7	1	2	0	2317	1	0	0	20	2	114	1	152	2	1	0
18	25	3	1	0	0	-221	1	0	1	23	1	250	1	-1	0	0	0
19	31	1	0	1	0	132	0	0	0	7	7	148	1	152	1	2	0
20	38	2	2	3	0	0	1	0	0	18	8	96	2	-1	0	0	0
21	42	2	2	2	0	16	0	0	0	19	8	140	3	-1	0	0	0
22	44	1	1	1	0	106	0	0	1	12	3	109	2	-1	0	0	0
23	44	6	0	1	0	93	0	0	0	7	7	125	2	-1	0	0	0
24	26	9	0	2	0	543	0	0	0	30	6	169	3	-1	0	0	0
25	41	2	0	2	0	5883	0	0	0	20	8	182	2	-1	0	0	0
26	55	3	0	0	0	627	1	0	1	5	1	247	1	-1	0	0	0
27	67	10	0	3	0	696	0	0	2	17	5	119	1	105	2	1	0
28	56	4	0	1	0	784	0	1	0	30	7	149	2	-1	0	0	0
29	53	7	0	1	0	105	0	1	0	21	5	74	2	-1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4491	35	3	1	1	0	0	1	0	0	16	2	169	1	-1	0	0	0
4492	32	5	1	1	0	309	1	1	0	16	2	346	1	234	3	1	0
4493	28	5	1	2	0	0	1	0	1	4	3	205	6	-1	0	0	0
4494	26	5	1	1	0	668	1	0	1	28	1	576	3	-1	0	0	1
4495	48	2	0	2	0	1175	1	0	2	18	8	1476	3	-1	0	0	0
4496	30	3	1	1	0	363	0	0	0	28	7	171	3	-1	0	0	0
4497	31	6	1	2	0	38	0	0	0	20	8	185	2	-1	0	0	0
4498	31	2	0	2	0	1183	1	0	1	27	1	676	6	-1	0	0	0
4499	45	3	2	0	0	942	0	0	0	21	8	362	1	-1	0	0	0
4500	38	7	0	1	0	4196	1	0	0	12	1	193	2	-1	0	0	0
4501	34	2	0	2	0	297	1	0	0	26	5	63	4	-1	0	0	0
4502	42	1	0	1	0	-91	1	1	0	5	4	43	1	-1	0	0	0
4503	60	4	0	0	0	362	0	1	0	29	7	816	6	-1	0	0	1
4504	42	3	1	1	0	1080	1	1	0	13	1	951	3	370	4	1	1
4505	32	7	1	1	0	620	1	0	1	26	1	1234	3	-1	0	0	1
4506	42	0	2	2	0	-166	0	0	0	29	5	85	4	-1	0	0	0
4507	33	1	0	1	0	288	1	0	0	17	2	306	1	-1	0	0	0
4508	42	7	0	3	0	642	1	1	1	16	1	509	2	-1	0	0	0
4509	51	5	0	2	0	2506	0	0	0	30	8	210	3	-1	0	0	0
4510	36	5	2	1	0	566	1	0	1	20	1	129	2	-1	0	0	0
4511	46	3	0	1	0	668	1	0	1	15	1	1263	2	-1	0	0	1
4512	40	3	0	1	0	1100	1	0	1	29	1	660	2	-1	0	0	0
4513	49	3	0	1	0	322	0	0	0	14	5	356	2	-1	0	0	0
4514	38	3	0	1	0	1205	1	0	0	20	2	45	4	153	1	1	0
4515	32	1	1	1	0	473	1	0	0	7	7	624	5	-1	0	0	0
4516	33	1	0	1	0	-333	1	0	0	30	7	329	5	-1	0	0	0
4517	57	4	0	2	1	-3313	1	1	1	9	1	153	1	-1	0	0	0
4518	57	5	0	1	0	295	0	0	0	19	5	151	11	-1	0	0	0
4519	28	3	0	1	0	1137	0	0	0	6	4	129	4	211	3	2	0
4520	44	6	1	2	0	1136	1	1	0	3	2	345	2	249	7	2	0

	Precision		Recall		F1_score		Support
	no	yes	no	yes	no	yes	no	yes
0	0.933374	0.462428	0.942593	0.42328	0.937961	0.441989	1620.0	189.0

	Params			Precision		Recall		F1_score		Support
	MaxDepth	Nfeature	Features	no	yes	no	yes	no	yes	no	yes
0	1	2	Hello	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0