This Data Mining Workflow uses both Pipefitter and SWAT

Set up the Jupyter Notebook for Analysis



In [1]:

    
# Import necessary packages and modules
import swat, collections
import pandas as pd
import matplotlib.pyplot as plt
from pipefitter.pipeline import Pipeline
from pipefitter.transformer import Imputer
from pipefitter.estimator import DecisionTree, DecisionForest, GBTree, NeuralNetwork
%matplotlib inline

# Set the connection by specifying the hostname, port, username, and password
conn = swat.CAS(host, port, username, password)

# Data set shortcut
indata = 'hmeq'

# Create a CAS library called DMLib pointing to the defined server-side directory
DMLib = conn.addCaslib('DMlib', datasource = 'path', path = '/viyafiles')

# Do a server-side data load into CAS memory
castbl = conn.loadTable(indata + '.sas7bdat', casOut = indata)['casTable']
castbl.replace = True









    



NOTE: 'DMlib' is now the active caslib.
NOTE: Cloud Analytic Services added the caslib 'DMlib'.
NOTE: Cloud Analytic Services made the file hmeq.sas7bdat available as table HMEQ in caslib DMlib.

Get the first few rows



In [2]:

    
# Assign the variable name df to the new CASTable object
df = conn.CASTable(indata)

# Perform the head method to return the first 5 rows
df.head()









    Out[2]:





Selected Rows from Table HMEQ
  
    
      
      BAD
      LOAN
      MORTDUE
      VALUE
      REASON
      JOB
      YOJ
      DEROG
      DELINQ
      CLAGE
      NINQ
      CLNO
      DEBTINC
    
  
  
    
      0
      1.0
      1100.0
      25860.0
      39025.0
      HomeImp
      Other
      10.5
      0.0
      0.0
      94.366667
      1.0
      9.0
      NaN
    
    
      1
      1.0
      1300.0
      70053.0
      68400.0
      HomeImp
      Other
      7.0
      0.0
      2.0
      121.833333
      0.0
      14.0
      NaN
    
    
      2
      1.0
      1500.0
      13500.0
      16700.0
      HomeImp
      Other
      4.0
      0.0
      0.0
      149.466667
      1.0
      10.0
      NaN
    
    
      3
      1.0
      1500.0
      NaN
      NaN
      
      
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      0.0
      1700.0
      97800.0
      112000.0
      HomeImp
      Office
      3.0
      0.0
      0.0
      93.333333
      0.0
      14.0
      NaN

Feature engineering



In [3]:

    
# Create new columns to help with analysis
df['MORTPAID'] = df['VALUE'] - df['MORTDUE']
df.head()









    Out[3]:





Selected Rows from Table HMEQ
  
    
      
      BAD
      LOAN
      MORTDUE
      VALUE
      REASON
      JOB
      YOJ
      DEROG
      DELINQ
      CLAGE
      NINQ
      CLNO
      DEBTINC
      MORTPAID
    
  
  
    
      0
      1.0
      1100.0
      25860.0
      39025.0
      HomeImp
      Other
      10.5
      0.0
      0.0
      94.366667
      1.0
      9.0
      NaN
      13165.0
    
    
      1
      1.0
      1300.0
      70053.0
      68400.0
      HomeImp
      Other
      7.0
      0.0
      2.0
      121.833333
      0.0
      14.0
      NaN
      -1653.0
    
    
      2
      1.0
      1500.0
      13500.0
      16700.0
      HomeImp
      Other
      4.0
      0.0
      0.0
      149.466667
      1.0
      10.0
      NaN
      3200.0
    
    
      3
      1.0
      1500.0
      NaN
      NaN
      
      
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      0.0
      1700.0
      97800.0
      112000.0
      HomeImp
      Office
      3.0
      0.0
      0.0
      93.333333
      0.0
      14.0
      NaN
      14200.0

Investigate the data

Variable types



In [4]:

    
# Get the variable types
df.dtypes









    Out[4]:





BAD         double
LOAN        double
MORTDUE     double
VALUE       double
REASON        char
JOB           char
YOJ         double
DEROG       double
DELINQ      double
CLAGE       double
NINQ        double
CLNO        double
DEBTINC     double
MORTPAID    double
dtype: object

Summary statistics



In [5]:

    
# Get summary statistics using the describe method, then switch rows and columns
summary = df.describe(include = 'all').transpose()
summary

Numeric variable distribution



In [6]:

    
# Get the distribution of all numeric variables
df.hist(figsize = (15, 10));

Investigate missingness



In [7]:

    
# Create percent missing column for plotting
summary['pctmiss'] = (len(castbl) - summary['count'])/len(castbl)

# Make a bar graph using pandas/matplotlib functionality
summary.query('pctmiss > 0')['pctmiss'].plot('bar', title = 'Pct Missing Values', figsize = (10, 6), color = 'c');

Split the data into training and validation before imputing



In [8]:

    
# Load the sampling actionset
conn.loadactionset('sampling')

# Do a simple random sample with a 70/30 split
df.srs(samppct = 70, partind = True, output = dict(casout = castbl, copyvars = 'all'))

# Verify that the partition worked using the groupby method
castbl.groupby('_PartInd_')['_PartInd_'].count()/len(castbl)









    



NOTE: Added action set 'sampling'.
NOTE: Using SEED=329570658 for sampling.






    Out[8]:





_PartInd_
0.0    0.3
1.0    0.7
Name: _PartInd_, dtype: float64

Impute missing values using pipefitter pipelines



In [9]:

    
# Will end up imputing the median value for numeric variables, most common value for nominal
imp_castbl = Pipeline([Imputer(Imputer.MEDIAN), Imputer(Imputer.MODE)]).transform(castbl)

# I want my imputed dataset name to have the imp_ prefix
imp_castbl.partition(casout = dict(name = 'imp_' + indata, replace = True))

# Remove the unnecessary impute staging tables
[conn.droptable(s) for s in conn.tableinfo()['TableInfo']['Name'] if 'IMPUTE' in s]

# Make sure everything worked properly for the new imputed dataset
conn.fetch('imp_' + indata, to = 5)









    



NOTE: Cloud Analytic Services dropped table IMPUTE_5373D5ED_F76F_4317_B212_B8DD29E6C53F from caslib DMlib.
NOTE: Cloud Analytic Services dropped table IMPUTE_15245ACF_D875_40E4_850D_C4CDAADD6851 from caslib DMlib.






    Out[9]:




§ Fetch


Selected Rows from Table IMP_HMEQ
  
    
      
      BAD
      LOAN
      MORTDUE
      VALUE
      REASON
      JOB
      YOJ
      DEROG
      DELINQ
      CLAGE
      NINQ
      CLNO
      DEBTINC
      MORTPAID
      _PartInd_
    
  
  
    
      0
      1.0
      1100.0
      25860.0
      39025.0
      HomeImp
      Other
      10.5
      0.0
      0.0
      94.366667
      1.0
      9.0
      34.818262
      13165.0
      1.0
    
    
      1
      1.0
      1300.0
      70053.0
      68400.0
      HomeImp
      Other
      7.0
      0.0
      2.0
      121.833333
      0.0
      14.0
      34.818262
      -1653.0
      1.0
    
    
      2
      1.0
      1500.0
      13500.0
      16700.0
      HomeImp
      Other
      4.0
      0.0
      0.0
      149.466667
      1.0
      10.0
      34.818262
      3200.0
      1.0
    
    
      3
      1.0
      1500.0
      65019.0
      89235.5
      DebtCon
      Other
      7.0
      0.0
      0.0
      173.466667
      1.0
      20.0
      34.818262
      26623.0
      0.0
    
    
      4
      0.0
      1700.0
      97800.0
      112000.0
      HomeImp
      Office
      3.0
      0.0
      0.0
      93.333333
      0.0
      14.0
      34.818262
      14200.0
      1.0
    
  




elapsed 0.0277s · user 0.009s · sys 0.012s · mem 2.26MB

Do a quick check on the available datasets



In [10]:

    
conn.tableinfo()









    Out[10]:




§ TableInfo



  
    
      
      Name
      Rows
      Columns
      Encoding
      CreateTimeFormatted
      ModTimeFormatted
      JavaCharSet
      CreateTime
      ModTime
      Global
      Repeated
      View
      SourceName
      SourceCaslib
      Compressed
      Creator
      Modifier
    
  
  
    
      0
      HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:14
      17Apr2017:17:57:14
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      1
      IMP_HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:15
      17Apr2017:17:57:15
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
  




elapsed 0.00946s · sys 0.005s · mem 0.193MB

Prepare for modeling - shortcuts to make life easier



In [11]:

    
# Create CASTable objects for training and validation for models that can handle missing values
train = conn.CASTable(indata, where = '_partind_ = 1')
valid = conn.CASTable(indata, where = '_partind_ = 0')

# Create CASTable objects for training and validation for models that cannot handle missing values
imp_train = conn.CASTable('imp_' + indata, where = '_partind_ = 1')
imp_valid = conn.CASTable('imp_' + indata, where = '_partind_ = 0')

# Key word argument shortcuts for model building (common imputs)
params = dict(
    target = castbl.columns[0],
    inputs = castbl.columns[1:-1].tolist(),
    nominals = [castbl.columns[0]] + castbl.columninfo()['ColumnInfo'].query('Type == "varchar"')['Column'].tolist()
)

Model Fitting & Scoring using Pipefitter

Decision Tree



In [12]:

    
# Train decision tree on training dataset
dt = DecisionTree(**params).fit(train)

# Score decision tree on validation dataset
dt_score = dt.score(valid)
dt_score









    Out[12]:





Target                            BAD
Level                           CLASS
Var                            _DT_P_
NBins                             100
NObsUsed                         1788
TargetCount                      1788
TargetMiss                          0
PredCount                        1788
PredMiss                            0
Event                               0
EventCount                       1415
NonEventCount                     373
EventMiss                           0
AreaUnderROCCurve            0.826011
CRCut                            0.72
ClassificationCutOff              0.5
KS                           0.630406
KSCutOff                         0.72
MisClassificationRate          14.094
dtype: object

Random Forest



In [13]:

    
# Train random forest on training dataset
rf = DecisionForest(**params).fit(train)

# Score decision tree on validation dataset
rf_score = rf.score(valid)
rf_score









    Out[13]:





Target                            BAD
Level                           CLASS
Var                            _RF_P_
NBins                             100
NObsUsed                         1788
TargetCount                      1788
TargetMiss                          0
PredCount                        1788
PredMiss                            0
Event                               0
EventCount                       1415
NonEventCount                     373
EventMiss                           0
AreaUnderROCCurve            0.877359
CRCut                            0.58
ClassificationCutOff              0.5
KS                           0.654803
KSCutOff                         0.83
MisClassificationRate         11.9128
dtype: object

Gradient Boosting



In [14]:

    
# Train gradient boosting on training dataset
gbt = GBTree(**params).fit(train)

# Score gradient boosting on validation dataset
gbt_score = gbt.score(valid)
gbt_score









    Out[14]:





Target                            BAD
Level                           CLASS
Var                           _GBT_P_
NBins                             100
NObsUsed                         1788
TargetCount                      1788
TargetMiss                          0
PredCount                        1788
PredMiss                            0
Event                               0
EventCount                       1415
NonEventCount                     373
EventMiss                           0
AreaUnderROCCurve            0.937564
CRCut                            0.59
ClassificationCutOff              0.5
KS                           0.734302
KSCutOff                         0.75
MisClassificationRate         9.67562
dtype: object

Neural Network



In [15]:

    
# Train neural network on training dataset
nn = NeuralNetwork(**params).fit(imp_train)

# Score neural network on validation dataset
nn_score = nn.score(imp_valid, event = 0)
nn_score









    Out[15]:





Target                            BAD
Level                           CLASS
Var                            _NN_P_
NBins                             100
NObsUsed                         1788
TargetCount                      1788
TargetMiss                          0
PredCount                        1788
PredMiss                            0
Event                               1
EventCount                        373
NonEventCount                    1415
EventMiss                           0
AreaUnderROCCurve             0.75782
CRCut                            0.47
ClassificationCutOff              0.5
KS                           0.401029
KSCutOff                         0.21
MisClassificationRate          17.226
dtype: object

Model Assessment

Compare model misclassification side-by-side



In [16]:

    
# Aggregate the misclassification metrics from previous output
Models = ['Decision Tree', 'Random Forest', 'Gradient Boosting', 'Neural Network']
Misclassification = [dt_score[-1], rf_score[-1], gbt_score[-1], nn_score[-1]]
mcr = pd.DataFrame({'Misclassification Rate': Misclassification}, Models).sort_values('Misclassification Rate')
print('\n', mcr)

# Which model is the champion?
print('\n', 'The', mcr.index[0], 'model is the champion!')









    



                    Misclassification Rate
Gradient Boosting                9.675615
Random Forest                   11.912752
Decision Tree                   14.093960
Neural Network                  17.225951

 The Gradient Boosting model is the champion!

Optional: obtain more model assessment metrics

Check available tables

There should be the prepared datasets and saved models



In [17]:

    
# What are the in-memory tables?
conn.tableinfo()









    Out[17]:




§ TableInfo



  
    
      
      Name
      Rows
      Columns
      Encoding
      CreateTimeFormatted
      ModTimeFormatted
      JavaCharSet
      CreateTime
      ModTime
      Global
      Repeated
      View
      SourceName
      SourceCaslib
      Compressed
      Creator
      Modifier
    
  
  
    
      0
      HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:14
      17Apr2017:17:57:14
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      1
      IMP_HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:15
      17Apr2017:17:57:15
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      2
      KMODELTREEF6BEF868_5BEF_4F67_8A03_938A7038CA83
      13
      28
      utf-8
      17Apr2017:17:57:16
      17Apr2017:17:57:16
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      3
      KMODELFOREST_D99A1A67_3A12_4E87_B8B2_92A01EE175B9
      690
      42
      utf-8
      17Apr2017:17:57:16
      17Apr2017:17:57:16
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      4
      KMODELGBT_27489703_D6BA_4135_A2E2_3F04DF8BB81B
      2480
      32
      utf-8
      17Apr2017:17:57:24
      17Apr2017:17:57:24
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      5
      NNMODEL2E2DE424_B238_4FAE_9FD3_B470212B80FD
      182
      15
      utf-8
      17Apr2017:17:57:27
      17Apr2017:17:57:27
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
  




elapsed 0.0231s · user 0.001s · sys 0.007s · mem 0.198MB

Assessment dictionary shortcuts



In [18]:

    
# Give model tables appropriate shortcuts in a dictionary
saved = {}
saved['dt']  = next((s for s in conn.tableinfo()['TableInfo']['Name'] if 'MODELTREE' in s), None)
saved['rf']  = next((s for s in conn.tableinfo()['TableInfo']['Name'] if 'MODELFOREST' in s), None)
saved['gbt'] = next((s for s in conn.tableinfo()['TableInfo']['Name'] if 'MODELGBT' in s), None)
saved['nn']  = next((s for s in conn.tableinfo()['TableInfo']['Name'] if 'NNMODEL' in s), None)

# Models to be assessed
models = collections.OrderedDict()
models['dt']  = 'Decision Tree'
models['rf']  = 'Random Forest'
models['gbt'] = 'Gradient Boosting'
models['nn']  = 'Neural Network'

Score the models built using Pipefitter



In [19]:

    
# Define function that will score the models based on the model prefix
def score_model(model):
    return dict(
        table        = valid,
        modelTable   = saved[model],
        assessonerow = True,
        copyvars     = [castbl.columns[0], castbl.columns[-1]],
        casOut       = dict(name = model + '_scored', replace = True)
    )

### Decision Tree
conn.dtreeScore(**score_model('dt'))

### Random Forest
conn.forestScore(**score_model('rf'))

### Gradient Boosting
conn.gbtreeScore(**score_model('gbt'))

### Neural Network
conn.annScore(**score_model('nn'))

# See the available tables now
conn.tableinfo()









    Out[19]:




§ TableInfo



  
    
      
      Name
      Rows
      Columns
      Encoding
      CreateTimeFormatted
      ModTimeFormatted
      JavaCharSet
      CreateTime
      ModTime
      Global
      Repeated
      View
      SourceName
      SourceCaslib
      Compressed
      Creator
      Modifier
    
  
  
    
      0
      HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:14
      17Apr2017:17:57:14
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      1
      IMP_HMEQ
      5960
      15
      utf-8
      17Apr2017:17:57:15
      17Apr2017:17:57:15
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      2
      KMODELTREEF6BEF868_5BEF_4F67_8A03_938A7038CA83
      13
      28
      utf-8
      17Apr2017:17:57:16
      17Apr2017:17:57:16
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      3
      KMODELFOREST_D99A1A67_3A12_4E87_B8B2_92A01EE175B9
      690
      42
      utf-8
      17Apr2017:17:57:16
      17Apr2017:17:57:16
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      4
      KMODELGBT_27489703_D6BA_4135_A2E2_3F04DF8BB81B
      2480
      32
      utf-8
      17Apr2017:17:57:24
      17Apr2017:17:57:24
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      5
      NNMODEL2E2DE424_B238_4FAE_9FD3_B470212B80FD
      182
      15
      utf-8
      17Apr2017:17:57:27
      17Apr2017:17:57:27
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      6
      DT_SCORED
      1788
      16
      utf-8
      17Apr2017:17:57:28
      17Apr2017:17:57:28
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      7
      RF_SCORED
      1788
      9
      utf-8
      17Apr2017:17:57:28
      17Apr2017:17:57:28
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      8
      GBT_SCORED
      1788
      8
      utf-8
      17Apr2017:17:57:28
      17Apr2017:17:57:28
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
    
      9
      NN_SCORED
      1788
      6
      utf-8
      17Apr2017:17:57:28
      17Apr2017:17:57:28
      UTF8
      1.808071e+09
      1.808071e+09
      0
      0
      0
      
      
      0
      jelueb
      
    
  




elapsed 0.00993s · user 0.003s · sys 0.008s · mem 0.206MB

Assess the models using the scored datasets



In [20]:

    
# Model assessment function
def assess_model(model):
    return conn.assess(
        table    = dict(name = model + '_scored', where = '_partind_ = 0'),
        inputs   = '_' + model + '_p_           1',      
        response = castbl.columns[0],
        event    = '1'  
    )

# Loop through the models and append to the roc_df dataframe
roc_df  = pd.DataFrame()
for i in range(len(models)):
    tmp = assess_model(list(models)[i])
    tmp.ROCInfo['Model'] = list(models.values())[i]
    roc_df = pd.concat([roc_df, tmp.ROCInfo])

Additional Assessment

Confusion Matrix



In [21]:

    
# Display stacked confusion matrix
print('\n', 'Confusion Matrix Information'.center(38, ' '))
roc_df[round(roc_df['CutOff'], 2) == 0.5][['Model', 'TP', 'FP', 'FN', 'TN']].reset_index(drop = True)









    



      Confusion Matrix Information     






    Out[21]:






  
    
      
      Model
      TP
      FP
      FN
      TN
    
  
  
    
      0
      Decision Tree
      273.0
      153.0
      100.0
      1262.0
    
    
      1
      Random Forest
      100.0
      2.0
      273.0
      1413.0
    
    
      2
      Gradient Boosting
      254.0
      65.0
      119.0
      1350.0
    
    
      3
      Neural Network
      22.0
      22.0
      73.0
      890.0

ROC Curve



In [22]:

    
# Plot ROC curve
plt.figure(figsize = (7, 6))
for key, grp in roc_df.groupby(['Model']):
    plt.plot(grp['FPR'], grp['Sensitivity'], label = key + ' (C = %0.2f)' % grp['C'].mean())
plt.plot([0,1], [0,1], 'k--')
plt.xlabel('False Postivie Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc = 'lower right')
plt.title('ROC Curve (using validation data)');

End the session



In [23]:

    
# End the session
conn.endsession()









    Out[23]:





elapsed 0.0198s · user 0.002s · sys 0.004s · mem 0.109MB

	count	unique	top	freq	mean	std	min	25%	50%	75%	max
BAD	5960	2	0	4771	0.199497	0.399656	0	0	0	0	1
LOAN	5960	540	15000	105	18608	11207.5	1100	11100	16300	23300	89900
MORTDUE	5442	5053	42000	11	73760.8	44457.6	2063	46268	65019	91491	399550
VALUE	5848	5381	60000	15	101776	57385.8	8000	66069	89235.5	119832	855909
REASON	5960	2	DebtCon	3928	NaN	NaN	DebtCon	NaN	NaN	NaN	HomeImp
JOB	5960	6	Other	2388	NaN	NaN	Mgr	NaN	NaN	NaN	Self
YOJ	5445	99	0	415	8.92227	7.57398	0	3	7	13	41
DEROG	5252	11	0	4527	0.25457	0.846047	0	0	0	0	10
DELINQ	5380	14	0	4179	0.449442	1.12727	0	0	0	0	15
CLAGE	5652	5314	206.967	7	179.766	85.8101	0	115.103	173.467	231.575	1168.23
NINQ	5450	16	0	2531	1.18606	1.72867	0	0	1	2	17
CLNO	5738	62	16	316	21.2961	10.1389	0	15	20	26	71
DEBTINC	4693	4693	203.312	1	33.7799	8.60175	0.524499	29.14	34.8183	39.0031	203.312
MORTPAID	5357	4915	18000	10	31235.2	26343.4	-205445	17355	26623	39757	287300

	BAD	LOAN	MORTDUE	VALUE	REASON	JOB	YOJ	DEROG	DELINQ	CLAGE	NINQ	CLNO	DEBTINC
0	1.0	1100.0	25860.0	39025.0	HomeImp	Other	10.5	0.0	0.0	94.366667	1.0	9.0	NaN
1	1.0	1300.0	70053.0	68400.0	HomeImp	Other	7.0	0.0	2.0	121.833333	0.0	14.0	NaN
2	1.0	1500.0	13500.0	16700.0	HomeImp	Other	4.0	0.0	0.0	149.466667	1.0	10.0	NaN
3	1.0	1500.0	NaN	NaN			NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	0.0	1700.0	97800.0	112000.0	HomeImp	Office	3.0	0.0	0.0	93.333333	0.0	14.0	NaN

	Name	Rows	Columns	Encoding	CreateTimeFormatted	ModTimeFormatted	JavaCharSet	CreateTime	ModTime	Global	Repeated	View	SourceName	SourceCaslib	Compressed	Creator	Modifier
0	HMEQ	5960	15	utf-8	17Apr2017:17:57:14	17Apr2017:17:57:14	UTF8	1.808071e+09	1.808071e+09	0	0	0			0	jelueb
1	IMP_HMEQ	5960	15	utf-8	17Apr2017:17:57:15	17Apr2017:17:57:15	UTF8	1.808071e+09	1.808071e+09	0	0	0			0	jelueb

	Model	TP	FP	FN	TN
0	Decision Tree	273.0	153.0	100.0	1262.0
1	Random Forest	100.0	2.0	273.0	1413.0
2	Gradient Boosting	254.0	65.0	119.0	1350.0
3	Neural Network	22.0	22.0	73.0	890.0