notebook.community

Edit and run



In [11]:

    
df = pd.read_csv('lc_dataframe(cleaning).csv')
df.tail()









    Out[11]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      loan_status
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      31050
      21.99
      1
      10
      1
      875000.0
      1
      12
      1
      0
      3
      9.66
      1
      0
      0
      25770
      79.3
      13
      0
    
    
      268132
      10800
      7.89
      1
      8
      1
      92400.0
      1
      12
      1
      0
      2
      19.62
      1
      0
      0
      9760
      68.7
      36
      1
    
    
      268133
      9000
      9.17
      1
      1
      1
      80000.0
      1
      12
      1
      0
      3
      3.97
      1
      0
      0
      6320
      51.8
      17
      0
    
    
      268134
      14400
      25.99
      0
      11
      4
      62000.0
      1
      12
      1
      0
      3
      16.88
      0
      1
      1
      5677
      45.1
      30
      0
    
    
      268135
      8000
      12.59
      1
      4
      3
      45000.0
      1
      12
      1
      0
      3
      26.21
      0
      0
      0
      9097
      50.8
      47
      1

DataFrame
- df: from csv
- dfX: Feature Variable
- dfy: Dependent Variable
- dfX_s: Feature Variable after Scaling

Function
- sm.logit(data_test, dfy): Statsmodels LogisticRegression Function
- stratified_cross_val(data_test, dfy, n_iter) : print classification report of cross validation with StratifiedKFold



In [12]:

    
dfX = df.copy()
del dfX['loan_status']
dfy = pd.DataFrame(df['loan_status'], columns=['loan_status'])



In [14]:

    
from sklearn.preprocessing import scale, robust_scale
dfX_s = dfX.copy()
dfX_s['loan_amnt'] = scale(dfX_s['loan_amnt'].reshape(-1, 1))
dfX_s['annual_inc'] = scale(dfX_s['annual_inc'].reshape(-1, 1))
dfX_s['revol_bal'] = scale(dfX_s['revol_bal'].reshape(-1, 1))
dfX_s.tail()









    



/home/dockeruser/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:3: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  app.launch_new_instance()
/home/dockeruser/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:4: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
/home/dockeruser/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:5: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead






    Out[14]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      10
      1
      13.782613
      1
      12
      0
      3
      9.66
      1
      0
      0
      0.561689
      79.3
      13
      0
    
    
      268132
      -0.352019
      7.89
      1
      8
      1
      0.343283
      1
      12
      0
      2
      19.62
      1
      0
      0
      -0.287872
      68.7
      36
      1
    
    
      268133
      -0.572376
      9.17
      1
      1
      1
      0.130342
      1
      12
      0
      3
      3.97
      1
      0
      0
      -0.470413
      51.8
      17
      0
    
    
      268134
      0.088696
      25.99
      0
      11
      4
      -0.178766
      1
      12
      0
      3
      16.88
      0
      1
      1
      -0.504533
      45.1
      30
      0
    
    
      268135
      -0.694797
      12.59
      1
      4
      3
      -0.470701
      1
      12
      0
      3
      26.21
      0
      0
      0
      -0.323053
      50.8
      47
      1



In [15]:

    
from sklearn.decomposition import PCA

pca = PCA(n_components=None, whiten=False).fit(dfX_s)
var = pca.explained_variance_
plt.bar(np.arange(1, len(var)+1), var/np.sum(var), align='center')
plt.step(np.arange(1,len(var)+1), np.cumsum(var)/np.sum(var), where="mid")
plt.show()



In [16]:

    
from sklearn.metrics import classification_report



In [17]:

    
from sklearn.cross_validation import train_test_split



In [18]:

    
dfX_s.tail()









    Out[18]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      10
      1
      13.782613
      1
      12
      0
      3
      9.66
      1
      0
      0
      0.561689
      79.3
      13
      0
    
    
      268132
      -0.352019
      7.89
      1
      8
      1
      0.343283
      1
      12
      0
      2
      19.62
      1
      0
      0
      -0.287872
      68.7
      36
      1
    
    
      268133
      -0.572376
      9.17
      1
      1
      1
      0.130342
      1
      12
      0
      3
      3.97
      1
      0
      0
      -0.470413
      51.8
      17
      0
    
    
      268134
      0.088696
      25.99
      0
      11
      4
      -0.178766
      1
      12
      0
      3
      16.88
      0
      1
      1
      -0.504533
      45.1
      30
      0
    
    
      268135
      -0.694797
      12.59
      1
      4
      3
      -0.470701
      1
      12
      0
      3
      26.21
      0
      0
      0
      -0.323053
      50.8
      47
      1



In [19]:

    
dfX_test = dfX_s.ix[:, 0:3]
dfX_test.tail()



In [100]:

    
dfX_s.tail()









    Out[100]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      10
      1
      13.782613
      1
      12
      0
      3
      9.66
      1
      0
      0
      0.561689
      79.3
      13
      0
    
    
      268132
      -0.352019
      7.89
      1
      8
      1
      0.343283
      1
      12
      0
      2
      19.62
      1
      0
      0
      -0.287872
      68.7
      36
      1
    
    
      268133
      -0.572376
      9.17
      1
      1
      1
      0.130342
      1
      12
      0
      3
      3.97
      1
      0
      0
      -0.470413
      51.8
      17
      0
    
    
      268134
      0.088696
      25.99
      0
      11
      4
      -0.178766
      1
      12
      0
      3
      16.88
      0
      1
      1
      -0.504533
      45.1
      30
      0
    
    
      268135
      -0.694797
      12.59
      1
      4
      3
      -0.470701
      1
      12
      0
      3
      26.21
      0
      0
      0
      -0.323053
      50.8
      47
      1



In [23]:

    
data = dfX_s.copy()
data['purpose'].value_counts()









    Out[23]:





3     158081
2      52785
5      15775
10     15073
7       6537
12      5005
1       3741
8       3011
9       2152
14      1963
6       1735
13      1677
4        325
11       276
Name: purpose, dtype: int64



In [25]:

    
purpose = data['purpose']
purpose.max()









    Out[25]:





14



In [26]:

    
purpose = purpose.replace([1,4,5,6,7,8,9,11,12,13,14], 0)



In [30]:

    
data.tail()









    Out[30]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      10
      1
      13.782613
      1
      12
      0
      3
      9.66
      1
      0
      0
      0.561689
      79.3
      13
      0
    
    
      268132
      -0.352019
      7.89
      1
      8
      1
      0.343283
      1
      12
      0
      2
      19.62
      1
      0
      0
      -0.287872
      68.7
      36
      1
    
    
      268133
      -0.572376
      9.17
      1
      1
      1
      0.130342
      1
      12
      0
      3
      3.97
      1
      0
      0
      -0.470413
      51.8
      17
      0
    
    
      268134
      0.088696
      25.99
      0
      11
      4
      -0.178766
      1
      12
      0
      3
      16.88
      0
      1
      1
      -0.504533
      45.1
      30
      0
    
    
      268135
      -0.694797
      12.59
      1
      4
      3
      -0.470701
      1
      12
      0
      3
      26.21
      0
      0
      0
      -0.323053
      50.8
      47
      1



In [31]:

    
ohe= OneHotEncoder()
purpose = ohe.fit_transform(data['purpose'].reshape(-1,1)).toarray()









    



/home/dockeruser/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  from ipykernel import kernelapp as app



In [33]:

    
ohe.active_features_









    Out[33]:





array([ 0,  2,  3, 10])



In [34]:

    
purpose









    Out[34]:





array([[ 0.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.],
       ..., 
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  1.,  0.],
       [ 0.,  0.,  1.,  0.]])



In [35]:

    
purpose = pd.DataFrame(purpose, columns = ['purpose_other', 'purpose_2', 'purpose_3', 'purpose_10'])
purpose.tail()









    Out[35]:






  
    
      
      purpose_other
      purpose_2
      purpose_3
      purpose_10
    
  
  
    
      268131
      0.0
      0.0
      1.0
      0.0
    
    
      268132
      0.0
      1.0
      0.0
      0.0
    
    
      268133
      0.0
      0.0
      1.0
      0.0
    
    
      268134
      0.0
      0.0
      1.0
      0.0
    
    
      268135
      0.0
      0.0
      1.0
      0.0



In [36]:

    
data.columns









    Out[36]:





Index([u'loan_amnt', u'int_rate', u'emp_title', u'emp_length',
       u'home_ownership', u'annual_inc', u'verification_status', u'issue_d',
       u'desc', u'purpose', u'dti', u'delinq_2yrs', u'inq_last_6mths',
       u'pub_rec', u'revol_bal', u'revol_util', u'total_acc',
       u'initial_list_status'],
      dtype='object')



In [37]:

    
data_test = pd.concat([data['loan_amnt'], data['int_rate'], data['emp_title'], data['annual_inc'], data['desc'], purpose, data['dti'], data['delinq_2yrs'], data['verification_status']], axis=1)



In [171]:

    
data_test









    Out[171]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      annual_inc
      desc
      purpose_other
      purpose_2
      purpose_3
      purpose_10
      dti
      delinq_2yrs
      verification_status
    
  
  
    
      0
      -0.756007
      13.75
      1
      -0.865673
      91
      0.0
      0.0
      1.0
      0.0
      14.29
      1
      0
    
    
      1
      -1.245690
      10.28
      1
      -0.900018
      239
      1.0
      0.0
      0.0
      0.0
      1.50
      0
      0
    
    
      2
      -0.970243
      7.43
      1
      0.903112
      41
      0.0
      0.0
      1.0
      0.0
      0.27
      0
      0
    
    
      3
      -1.062059
      7.43
      1
      -0.556565
      845
      1.0
      0.0
      0.0
      0.0
      2.55
      0
      0
    
    
      4
      -1.527257
      11.54
      1
      -0.900018
      649
      0.0
      1.0
      0.0
      0.0
      2.04
      0
      0
    
    
      5
      -0.174509
      10.59
      1
      -0.213111
      751
      1.0
      0.0
      0.0
      0.0
      17.12
      1
      0
    
    
      6
      -1.343626
      15.96
      1
      -0.347058
      27
      0.0
      0.0
      1.0
      0.0
      12.57
      0
      0
    
    
      7
      -1.062059
      9.01
      1
      3.049696
      22
      0.0
      0.0
      0.0
      1.0
      10.00
      1
      0
    
    
      8
      -1.245690
      9.96
      1
      -1.011640
      536
      1.0
      0.0
      0.0
      0.0
      16.44
      0
      0
    
    
      9
      -1.062059
      7.43
      1
      1.332429
      103
      1.0
      0.0
      0.0
      0.0
      0.00
      0
      0
    
    
      10
      -1.062059
      7.43
      1
      0.387932
      214
      1.0
      0.0
      0.0
      0.0
      3.83
      0
      0
    
    
      11
      1.386355
      10.59
      1
      -0.419183
      598
      1.0
      0.0
      0.0
      0.0
      4.05
      0
      0
    
    
      12
      -0.449955
      10.28
      1
      -0.773352
      740
      1.0
      0.0
      0.0
      0.0
      10.70
      0
      0
    
    
      13
      -1.062059
      9.01
      1
      0.010134
      28
      0.0
      1.0
      0.0
      0.0
      9.96
      0
      0
    
    
      14
      -1.049817
      8.38
      1
      -0.693946
      84
      1.0
      0.0
      0.0
      0.0
      14.78
      0
      0
    
    
      15
      -1.013090
      7.75
      0
      -1.106090
      351
      1.0
      0.0
      0.0
      0.0
      3.00
      0
      0
    
    
      16
      -1.025332
      8.38
      1
      0.027306
      88
      0.0
      1.0
      0.0
      0.0
      14.37
      0
      0
    
    
      17
      -1.062059
      11.22
      1
      -0.728291
      260
      0.0
      1.0
      0.0
      0.0
      18.64
      0
      0
    
    
      18
      -1.306900
      10.28
      1
      2.191062
      485
      1.0
      0.0
      0.0
      0.0
      0.00
      1
      0
    
    
      19
      -1.306900
      8.70
      0
      -0.814155
      527
      1.0
      0.0
      0.0
      0.0
      14.54
      0
      0
    
    
      20
      -1.062059
      7.43
      1
      0.817249
      68
      0.0
      0.0
      1.0
      0.0
      2.29
      0
      0
    
    
      21
      -1.062059
      8.07
      0
      1.847609
      33
      1.0
      0.0
      0.0
      0.0
      5.55
      0
      0
    
    
      22
      -1.062059
      9.33
      1
      1.847609
      41
      1.0
      0.0
      0.0
      0.0
      11.93
      0
      0
    
    
      23
      -1.062059
      9.96
      1
      -0.419183
      104
      0.0
      0.0
      1.0
      0.0
      8.03
      0
      0
    
    
      24
      -1.062059
      11.22
      1
      0.130342
      0
      0.0
      1.0
      0.0
      0.0
      1.21
      0
      0
    
    
      25
      -1.062059
      7.43
      1
      0.216206
      0
      0.0
      1.0
      0.0
      0.0
      0.31
      0
      0
    
    
      26
      -1.062059
      8.70
      1
      0.044479
      72
      0.0
      1.0
      0.0
      0.0
      15.55
      0
      0
    
    
      27
      -1.368110
      8.07
      1
      0.645522
      148
      1.0
      0.0
      0.0
      0.0
      11.33
      0
      0
    
    
      28
      -0.633586
      10.28
      1
      -0.934363
      406
      0.0
      1.0
      0.0
      0.0
      6.40
      1
      0
    
    
      29
      -1.062059
      8.07
      0
      0.473796
      0
      0.0
      0.0
      1.0
      0.0
      2.30
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      268106
      0.284569
      16.99
      1
      -0.470701
      0
      1.0
      0.0
      0.0
      0.0
      17.17
      1
      1
    
    
      268107
      0.174390
      28.99
      1
      -0.522219
      0
      0.0
      0.0
      1.0
      0.0
      27.49
      0
      1
    
    
      268108
      -1.062059
      11.49
      1
      -0.384838
      0
      0.0
      1.0
      0.0
      0.0
      8.52
      0
      1
    
    
      268109
      -1.306900
      11.49
      1
      -0.693946
      0
      0.0
      0.0
      1.0
      0.0
      9.79
      0
      1
    
    
      268110
      1.508776
      19.99
      1
      0.336414
      0
      0.0
      1.0
      0.0
      0.0
      19.59
      0
      1
    
    
      268111
      1.753617
      18.99
      1
      2.294098
      0
      0.0
      0.0
      1.0
      0.0
      11.88
      0
      1
    
    
      268112
      -1.062059
      10.99
      1
      -0.470701
      0
      1.0
      0.0
      0.0
      0.0
      21.95
      0
      1
    
    
      268113
      0.162148
      9.99
      1
      -0.213111
      0
      0.0
      0.0
      1.0
      0.0
      9.86
      0
      0
    
    
      268114
      -1.123269
      9.17
      1
      -0.814155
      0
      0.0
      0.0
      1.0
      0.0
      31.20
      0
      0
    
    
      268115
      -1.429321
      12.05
      0
      -0.137551
      0
      0.0
      0.0
      1.0
      0.0
      22.29
      0
      1
    
    
      268116
      0.039727
      10.99
      1
      1.160702
      0
      0.0
      1.0
      0.0
      0.0
      7.32
      0
      1
    
    
      268117
      -1.107966
      15.41
      0
      -0.728291
      0
      0.0
      0.0
      1.0
      0.0
      16.28
      0
      1
    
    
      268118
      0.529410
      15.41
      1
      -0.110075
      0
      0.0
      0.0
      1.0
      0.0
      21.92
      0
      1
    
    
      268119
      2.610562
      16.99
      1
      0.319242
      0
      0.0
      0.0
      1.0
      0.0
      26.72
      0
      1
    
    
      268120
      0.162148
      12.05
      1
      0.130342
      0
      0.0
      0.0
      0.0
      1.0
      14.75
      0
      1
    
    
      268121
      2.610562
      13.67
      1
      0.319242
      0
      1.0
      0.0
      0.0
      0.0
      3.81
      0
      1
    
    
      268122
      1.998458
      7.26
      1
      1.847609
      0
      1.0
      0.0
      0.0
      0.0
      12.39
      0
      1
    
    
      268123
      -0.272445
      16.55
      1
      -0.213111
      0
      0.0
      0.0
      0.0
      1.0
      12.60
      0
      1
    
    
      268124
      -1.062059
      13.67
      1
      -0.106641
      0
      0.0
      0.0
      0.0
      1.0
      19.56
      0
      1
    
    
      268125
      2.610562
      9.17
      1
      1.783211
      0
      0.0
      1.0
      0.0
      0.0
      22.68
      0
      1
    
    
      268126
      -0.817217
      12.59
      1
      -0.630407
      0
      0.0
      0.0
      1.0
      0.0
      25.51
      0
      1
    
    
      268127
      0.162148
      17.57
      1
      -0.419183
      0
      0.0
      0.0
      1.0
      0.0
      27.12
      0
      0
    
    
      268128
      1.925006
      17.86
      1
      0.559659
      0
      0.0
      0.0
      1.0
      0.0
      30.19
      0
      1
    
    
      268129
      0.162148
      18.49
      1
      0.302069
      0
      0.0
      0.0
      1.0
      0.0
      25.13
      1
      0
    
    
      268130
      0.774252
      6.24
      1
      0.903112
      0
      0.0
      1.0
      0.0
      0.0
      14.20
      1
      1
    
    
      268131
      2.127000
      21.99
      1
      13.782613
      0
      0.0
      0.0
      1.0
      0.0
      9.66
      1
      1
    
    
      268132
      -0.352019
      7.89
      1
      0.343283
      0
      0.0
      1.0
      0.0
      0.0
      19.62
      1
      1
    
    
      268133
      -0.572376
      9.17
      1
      0.130342
      0
      0.0
      0.0
      1.0
      0.0
      3.97
      1
      1
    
    
      268134
      0.088696
      25.99
      0
      -0.178766
      0
      0.0
      0.0
      1.0
      0.0
      16.88
      0
      1
    
    
      268135
      -0.694797
      12.59
      1
      -0.470701
      0
      0.0
      0.0
      1.0
      0.0
      26.21
      0
      1
    
  

268136 rows × 12 columns



In [70]:

    
from sklearn.cross_validation import train_test_split, StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve



In [82]:

    
def stratified_cross_val(data_test, dfy, n_iter):
    cv = StratifiedKFold(dfy['loan_status'], n_folds=n_iter, random_state=3)
    for train_index, test_index in cv:
        X_train = data_test.ix[train_index, :]
        y_train = dfy.ix[train_index,:]
        X_test = data_test.ix[test_index, :]
        y_test = dfy.ix[test_index, :]
        model = LogisticRegression().fit(X_train, y_train)
        print(classification_report(y_test, model.predict(X_test)))
        print('='*80)



In [51]:

    
def sm_logit(data_test, dfy):
    model = sm.Logit(dfy, sm.add_constant(data_test)).fit()
    print(model.summary())



In [52]:

    
sm_logit(data_test2, dfy)









    



Optimization terminated successfully.
         Current function value: 0.491694
         Iterations 6
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            loan_status   No. Observations:               268136
Model:                          Logit   Df Residuals:                   268127
Method:                           MLE   Df Model:                            8
Date:                Tue, 14 Mar 2017   Pseudo R-squ.:                 0.07845
Time:                        13:12:33   Log-Likelihood:            -1.3184e+05
converged:                       True   LL-Null:                   -1.4306e+05
                                        LLR p-value:                     0.000
=======================================================================================
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                   3.1833      0.028    112.009      0.000       3.128       3.239
loan_amnt              -0.1295      0.006    -22.154      0.000      -0.141      -0.118
int_rate               -0.1251      0.001   -105.980      0.000      -0.127      -0.123
emp_title               0.4080      0.020     20.667      0.000       0.369       0.447
annual_inc              0.3000      0.009     32.345      0.000       0.282       0.318
desc                    0.0004   2.79e-05     13.942      0.000       0.000       0.000
dti                    -0.0249      0.001    -38.919      0.000      -0.026      -0.024
delinq_2yrs            -0.1040      0.013     -8.198      0.000      -0.129      -0.079
verification_status    -0.0900      0.012     -7.667      0.000      -0.113      -0.067
=======================================================================================



In [54]:

    
dfX_s.tail()









    Out[54]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      emp_length
      home_ownership
      annual_inc
      verification_status
      issue_d
      desc
      purpose
      dti
      delinq_2yrs
      inq_last_6mths
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      10
      1
      13.782613
      1
      12
      0
      3
      9.66
      1
      0
      0
      0.561689
      79.3
      13
      0
    
    
      268132
      -0.352019
      7.89
      1
      8
      1
      0.343283
      1
      12
      0
      2
      19.62
      1
      0
      0
      -0.287872
      68.7
      36
      1
    
    
      268133
      -0.572376
      9.17
      1
      1
      1
      0.130342
      1
      12
      0
      3
      3.97
      1
      0
      0
      -0.470413
      51.8
      17
      0
    
    
      268134
      0.088696
      25.99
      0
      11
      4
      -0.178766
      1
      12
      0
      3
      16.88
      0
      1
      1
      -0.504533
      45.1
      30
      0
    
    
      268135
      -0.694797
      12.59
      1
      4
      3
      -0.470701
      1
      12
      0
      3
      26.21
      0
      0
      0
      -0.323053
      50.8
      47
      1



In [55]:

    
dfX_s['pub_rec'].value_counts()









    Out[55]:





0    234589
1     33547
Name: pub_rec, dtype: int64



In [56]:

    
data_test.tail()









    Out[56]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      annual_inc
      desc
      purpose_other
      purpose_2
      purpose_3
      purpose_10
      dti
      delinq_2yrs
      verification_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      13.782613
      0
      0.0
      0.0
      1.0
      0.0
      9.66
      1
      1
    
    
      268132
      -0.352019
      7.89
      1
      0.343283
      0
      0.0
      1.0
      0.0
      0.0
      19.62
      1
      1
    
    
      268133
      -0.572376
      9.17
      1
      0.130342
      0
      0.0
      0.0
      1.0
      0.0
      3.97
      1
      1
    
    
      268134
      0.088696
      25.99
      0
      -0.178766
      0
      0.0
      0.0
      1.0
      0.0
      16.88
      0
      1
    
    
      268135
      -0.694797
      12.59
      1
      -0.470701
      0
      0.0
      0.0
      1.0
      0.0
      26.21
      0
      1



In [57]:

    
del data_test['purpose_other']
del data_test['purpose_2']
del data_test['purpose_3']



In [58]:

    
data_test.tail()









    Out[58]:






  
    
      
      loan_amnt
      int_rate
      emp_title
      annual_inc
      desc
      purpose_10
      dti
      delinq_2yrs
      verification_status
    
  
  
    
      268131
      2.127000
      21.99
      1
      13.782613
      0
      0.0
      9.66
      1
      1
    
    
      268132
      -0.352019
      7.89
      1
      0.343283
      0
      0.0
      19.62
      1
      1
    
    
      268133
      -0.572376
      9.17
      1
      0.130342
      0
      0.0
      3.97
      1
      1
    
    
      268134
      0.088696
      25.99
      0
      -0.178766
      0
      0.0
      16.88
      0
      1
    
    
      268135
      -0.694797
      12.59
      1
      -0.470701
      0
      0.0
      26.21
      0
      1



In [59]:

    
sm_logit(data_test, dfy)









    



Optimization terminated successfully.
         Current function value: 0.491682
         Iterations 6
                           Logit Regression Results                           
==============================================================================
Dep. Variable:            loan_status   No. Observations:               268136
Model:                          Logit   Df Residuals:                   268126
Method:                           MLE   Df Model:                            9
Date:                Tue, 14 Mar 2017   Pseudo R-squ.:                 0.07847
Time:                        13:17:15   Log-Likelihood:            -1.3184e+05
converged:                       True   LL-Null:                   -1.4306e+05
                                        LLR p-value:                     0.000
=======================================================================================
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                   3.1829      0.028    112.002      0.000       3.127       3.239
loan_amnt              -0.1317      0.006    -22.278      0.000      -0.143      -0.120
int_rate               -0.1247      0.001   -104.923      0.000      -0.127      -0.122
emp_title               0.4076      0.020     20.644      0.000       0.369       0.446
annual_inc              0.3010      0.009     32.418      0.000       0.283       0.319
desc                    0.0004   2.79e-05     13.956      0.000       0.000       0.000
purpose_10             -0.0520      0.021     -2.492      0.013      -0.093      -0.011
dti                    -0.0249      0.001    -38.992      0.000      -0.026      -0.024
delinq_2yrs            -0.1042      0.013     -8.214      0.000      -0.129      -0.079
verification_status    -0.0901      0.012     -7.675      0.000      -0.113      -0.067
=======================================================================================



In [83]:

    
stratified_cross_val(data_test, dfy, 10)









    



             precision    recall  f1-score   support

          0       1.00      0.00      0.01      6042
          1       0.78      1.00      0.87     20773

avg / total       0.83      0.78      0.68     26815

================================================================================
             precision    recall  f1-score   support

          0       0.87      0.03      0.06      6042
          1       0.78      1.00      0.88     20772

avg / total       0.80      0.78      0.69     26814

================================================================================
             precision    recall  f1-score   support

          0       0.63      0.06      0.11      6042
          1       0.78      0.99      0.87     20772

avg / total       0.75      0.78      0.70     26814

================================================================================
             precision    recall  f1-score   support

          0       0.62      0.09      0.16      6042
          1       0.79      0.98      0.88     20772

avg / total       0.75      0.78      0.71     26814

================================================================================
             precision    recall  f1-score   support

          0       0.54      0.11      0.19      6042
          1       0.79      0.97      0.87     20772

avg / total       0.73      0.78      0.72     26814

================================================================================
             precision    recall  f1-score   support

          0       0.42      0.10      0.16      6041
          1       0.79      0.96      0.86     20772

avg / total       0.70      0.77      0.71     26813

================================================================================
             precision    recall  f1-score   support

          0       0.44      0.09      0.14      6041
          1       0.78      0.97      0.87     20772

avg / total       0.71      0.77      0.70     26813

================================================================================
             precision    recall  f1-score   support

          0       0.56      0.11      0.18      6041
          1       0.79      0.97      0.87     20772

avg / total       0.74      0.78      0.72     26813

================================================================================
             precision    recall  f1-score   support

          0       0.50      0.13      0.20      6041
          1       0.79      0.96      0.87     20772

avg / total       0.73      0.77      0.72     26813

================================================================================
             precision    recall  f1-score   support

          0       0.43      0.12      0.19      6041
          1       0.79      0.95      0.86     20772

avg / total       0.71      0.77      0.71     26813

================================================================================



In [ ]:

	loan_amnt	int_rate	emp_title	emp_length	home_ownership	annual_inc	verification_status	issue_d	loan_status	purpose	dti	delinq_2yrs	inq_last_6mths	pub_rec	revol_bal	revol_util	total_acc	initial_list_status
268131	31050	21.99	1	10	1	875000.0	1	12	1	3	9.66	1	0	0	25770	79.3	13	0
268132	10800	7.89	1	8	1	92400.0	1	12	1	2	19.62	1	0	0	9760	68.7	36	1
268133	9000	9.17	1	1	1	80000.0	1	12	1	3	3.97	1	0	0	6320	51.8	17	0
268134	14400	25.99	0	11	4	62000.0	1	12	1	3	16.88	0	1	1	5677	45.1	30	0
268135	8000	12.59	1	4	3	45000.0	1	12	1	3	26.21	0	0	0	9097	50.8	47	1

	loan_amnt	int_rate	emp_title	emp_length	home_ownership	annual_inc	verification_status	issue_d	purpose	dti	delinq_2yrs	inq_last_6mths	pub_rec	revol_bal	revol_util	total_acc	initial_list_status
268131	2.127000	21.99	1	10	1	13.782613	1	12	3	9.66	1	0	0	0.561689	79.3	13	0
268132	-0.352019	7.89	1	8	1	0.343283	1	12	2	19.62	1	0	0	-0.287872	68.7	36	1
268133	-0.572376	9.17	1	1	1	0.130342	1	12	3	3.97	1	0	0	-0.470413	51.8	17	0
268134	0.088696	25.99	0	11	4	-0.178766	1	12	3	16.88	0	1	1	-0.504533	45.1	30	0
268135	-0.694797	12.59	1	4	3	-0.470701	1	12	3	26.21	0	0	0	-0.323053	50.8	47	1

	loan_amnt	int_rate	emp_title	annual_inc	desc	purpose_other	purpose_2	purpose_3	purpose_10	dti	delinq_2yrs	verification_status
0	-0.756007	13.75	1	-0.865673	91	0.0	0.0	1.0	0.0	14.29	1	0
1	-1.245690	10.28	1	-0.900018	239	1.0	0.0	0.0	0.0	1.50	0	0
2	-0.970243	7.43	1	0.903112	41	0.0	0.0	1.0	0.0	0.27	0	0
3	-1.062059	7.43	1	-0.556565	845	1.0	0.0	0.0	0.0	2.55	0	0
4	-1.527257	11.54	1	-0.900018	649	0.0	1.0	0.0	0.0	2.04	0	0
5	-0.174509	10.59	1	-0.213111	751	1.0	0.0	0.0	0.0	17.12	1	0
6	-1.343626	15.96	1	-0.347058	27	0.0	0.0	1.0	0.0	12.57	0	0
7	-1.062059	9.01	1	3.049696	22	0.0	0.0	0.0	1.0	10.00	1	0
8	-1.245690	9.96	1	-1.011640	536	1.0	0.0	0.0	0.0	16.44	0	0
9	-1.062059	7.43	1	1.332429	103	1.0	0.0	0.0	0.0	0.00	0	0
10	-1.062059	7.43	1	0.387932	214	1.0	0.0	0.0	0.0	3.83	0	0
11	1.386355	10.59	1	-0.419183	598	1.0	0.0	0.0	0.0	4.05	0	0
12	-0.449955	10.28	1	-0.773352	740	1.0	0.0	0.0	0.0	10.70	0	0
13	-1.062059	9.01	1	0.010134	28	0.0	1.0	0.0	0.0	9.96	0	0
14	-1.049817	8.38	1	-0.693946	84	1.0	0.0	0.0	0.0	14.78	0	0
15	-1.013090	7.75	0	-1.106090	351	1.0	0.0	0.0	0.0	3.00	0	0
16	-1.025332	8.38	1	0.027306	88	0.0	1.0	0.0	0.0	14.37	0	0
17	-1.062059	11.22	1	-0.728291	260	0.0	1.0	0.0	0.0	18.64	0	0
18	-1.306900	10.28	1	2.191062	485	1.0	0.0	0.0	0.0	0.00	1	0
19	-1.306900	8.70	0	-0.814155	527	1.0	0.0	0.0	0.0	14.54	0	0
20	-1.062059	7.43	1	0.817249	68	0.0	0.0	1.0	0.0	2.29	0	0
21	-1.062059	8.07	0	1.847609	33	1.0	0.0	0.0	0.0	5.55	0	0
22	-1.062059	9.33	1	1.847609	41	1.0	0.0	0.0	0.0	11.93	0	0
23	-1.062059	9.96	1	-0.419183	104	0.0	0.0	1.0	0.0	8.03	0	0
24	-1.062059	11.22	1	0.130342	0	0.0	1.0	0.0	0.0	1.21	0	0
25	-1.062059	7.43	1	0.216206	0	0.0	1.0	0.0	0.0	0.31	0	0
26	-1.062059	8.70	1	0.044479	72	0.0	1.0	0.0	0.0	15.55	0	0
27	-1.368110	8.07	1	0.645522	148	1.0	0.0	0.0	0.0	11.33	0	0
28	-0.633586	10.28	1	-0.934363	406	0.0	1.0	0.0	0.0	6.40	1	0
29	-1.062059	8.07	0	0.473796	0	0.0	0.0	1.0	0.0	2.30	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...
268106	0.284569	16.99	1	-0.470701	0	1.0	0.0	0.0	0.0	17.17	1	1
268107	0.174390	28.99	1	-0.522219	0	0.0	0.0	1.0	0.0	27.49	0	1
268108	-1.062059	11.49	1	-0.384838	0	0.0	1.0	0.0	0.0	8.52	0	1
268109	-1.306900	11.49	1	-0.693946	0	0.0	0.0	1.0	0.0	9.79	0	1
268110	1.508776	19.99	1	0.336414	0	0.0	1.0	0.0	0.0	19.59	0	1
268111	1.753617	18.99	1	2.294098	0	0.0	0.0	1.0	0.0	11.88	0	1
268112	-1.062059	10.99	1	-0.470701	0	1.0	0.0	0.0	0.0	21.95	0	1
268113	0.162148	9.99	1	-0.213111	0	0.0	0.0	1.0	0.0	9.86	0	0
268114	-1.123269	9.17	1	-0.814155	0	0.0	0.0	1.0	0.0	31.20	0	0
268115	-1.429321	12.05	0	-0.137551	0	0.0	0.0	1.0	0.0	22.29	0	1
268116	0.039727	10.99	1	1.160702	0	0.0	1.0	0.0	0.0	7.32	0	1
268117	-1.107966	15.41	0	-0.728291	0	0.0	0.0	1.0	0.0	16.28	0	1
268118	0.529410	15.41	1	-0.110075	0	0.0	0.0	1.0	0.0	21.92	0	1
268119	2.610562	16.99	1	0.319242	0	0.0	0.0	1.0	0.0	26.72	0	1
268120	0.162148	12.05	1	0.130342	0	0.0	0.0	0.0	1.0	14.75	0	1
268121	2.610562	13.67	1	0.319242	0	1.0	0.0	0.0	0.0	3.81	0	1
268122	1.998458	7.26	1	1.847609	0	1.0	0.0	0.0	0.0	12.39	0	1
268123	-0.272445	16.55	1	-0.213111	0	0.0	0.0	0.0	1.0	12.60	0	1
268124	-1.062059	13.67	1	-0.106641	0	0.0	0.0	0.0	1.0	19.56	0	1
268125	2.610562	9.17	1	1.783211	0	0.0	1.0	0.0	0.0	22.68	0	1
268126	-0.817217	12.59	1	-0.630407	0	0.0	0.0	1.0	0.0	25.51	0	1
268127	0.162148	17.57	1	-0.419183	0	0.0	0.0	1.0	0.0	27.12	0	0
268128	1.925006	17.86	1	0.559659	0	0.0	0.0	1.0	0.0	30.19	0	1
268129	0.162148	18.49	1	0.302069	0	0.0	0.0	1.0	0.0	25.13	1	0
268130	0.774252	6.24	1	0.903112	0	0.0	1.0	0.0	0.0	14.20	1	1
268131	2.127000	21.99	1	13.782613	0	0.0	0.0	1.0	0.0	9.66	1	1
268132	-0.352019	7.89	1	0.343283	0	0.0	1.0	0.0	0.0	19.62	1	1
268133	-0.572376	9.17	1	0.130342	0	0.0	0.0	1.0	0.0	3.97	1	1
268134	0.088696	25.99	0	-0.178766	0	0.0	0.0	1.0	0.0	16.88	0	1
268135	-0.694797	12.59	1	-0.470701	0	0.0	0.0	1.0	0.0	26.21	0	1