In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import ensemble
from sklearn.model_selection import cross_val_score

In [2]:
# Access to data

y2015 = pd.read_csv(
    'D:\\Users\\Borja.gonzalez\\Desktop\\Thinkful-DataScience-Borja\\LoanStats3d.csv',skipinitialspace=True,
    header=1)


D:\Users\Borja.gonzalez\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:2698: DtypeWarning: Columns (0,19,55) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [3]:
#Data cleaning
categorical = y2015.select_dtypes(include=['object'])
for i in categorical:
    column = categorical[i]
    print(i)
    print(column.nunique())


id
421097
term
2
int_rate
110
grade
7
sub_grade
35
emp_title
120812
emp_length
12
home_ownership
4
verification_status
3
issue_d
12
loan_status
7
pymnt_plan
1
url
421095
desc
34
purpose
14
title
27
zip_code
914
addr_state
49
earliest_cr_line
668
revol_util
1211
initial_list_status
2
last_pymnt_d
25
next_pymnt_d
4
last_credit_pull_d
26
application_type
2
verification_status_joint
3

In [4]:
# Drop other columns with many unique variables
y2015.drop(['url', 'emp_title', 'zip_code', 'earliest_cr_line', 'revol_util',
            'sub_grade', 'addr_state', 'desc'], 1, inplace=True)

In [5]:
y2015.tail()


Out[5]:
id member_id loan_amnt funded_amnt funded_amnt_inv term int_rate installment grade emp_length ... num_tl_90g_dpd_24m num_tl_op_past_12m pct_tl_nvr_dlq percent_bc_gt_75 pub_rec_bankruptcies tax_liens tot_hi_cred_lim total_bal_ex_mort total_bc_limit total_il_high_credit_limit
421092 36271333 38982739.0 13000.0 13000.0 13000.0 60 months 15.99% 316.07 D 5 years ... 0.0 3.0 100.0 50.0 1.0 0.0 51239.0 34178.0 10600.0 33239.0
421093 36490806 39222577.0 12000.0 12000.0 12000.0 60 months 19.99% 317.86 E 1 year ... 1.0 2.0 95.0 66.7 0.0 0.0 96919.0 58418.0 9700.0 69919.0
421094 36271262 38982659.0 20000.0 20000.0 20000.0 36 months 11.99% 664.20 B 10+ years ... 0.0 1.0 100.0 50.0 0.0 1.0 43740.0 33307.0 41700.0 0.0
421095 Total amount funded in policy code 1: 6417608175 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
421096 Total amount funded in policy code 2: 1944088810 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 103 columns


In [6]:
# Remove two summary rows at the end that don't actually contain data.
y2015 = y2015[:-2]

In [7]:
# Convert ID and Interest Rate to numeric.
y2015['id'] = pd.to_numeric(y2015['id'], errors='coerce')
y2015['int_rate'] = pd.to_numeric(y2015['int_rate'].str.strip('%'), errors='coerce')

In [8]:
pd.get_dummies(y2015).head()


Out[8]:
id member_id loan_amnt funded_amnt funded_amnt_inv int_rate installment annual_inc dti delinq_2yrs ... last_credit_pull_d_Nov-2016 last_credit_pull_d_Oct-2015 last_credit_pull_d_Oct-2016 last_credit_pull_d_Sep-2015 last_credit_pull_d_Sep-2016 application_type_INDIVIDUAL application_type_JOINT verification_status_joint_Not Verified verification_status_joint_Source Verified verification_status_joint_Verified
0 68009401 72868139.0 16000.0 16000.0 16000.0 14.85 379.39 48000.0 33.18 0.0 ... 0 0 0 0 0 1 0 0 0 0
1 68354783 73244544.0 9600.0 9600.0 9600.0 7.49 298.58 60000.0 22.44 0.0 ... 0 0 0 0 0 1 0 0 0 0
2 68466916 73356753.0 25000.0 25000.0 25000.0 7.49 777.55 109000.0 26.02 0.0 ... 0 0 0 0 0 1 0 0 0 0
3 68466961 73356799.0 28000.0 28000.0 28000.0 6.49 858.05 92000.0 21.60 0.0 ... 0 0 0 0 0 1 0 0 0 0
4 68495092 73384866.0 8650.0 8650.0 8650.0 19.89 320.99 55000.0 25.49 0.0 ... 0 0 0 0 0 1 0 0 0 0

5 rows × 238 columns


In [9]:
rfc = ensemble.RandomForestClassifier()
X = y2015.drop('loan_status', 1)
Y = y2015['loan_status']
X = pd.get_dummies(X)
X = X.dropna(axis=1)

In [10]:
X.dropna().head()


Out[10]:
id member_id loan_amnt funded_amnt funded_amnt_inv int_rate installment annual_inc dti delinq_2yrs ... last_credit_pull_d_Nov-2016 last_credit_pull_d_Oct-2015 last_credit_pull_d_Oct-2016 last_credit_pull_d_Sep-2015 last_credit_pull_d_Sep-2016 application_type_INDIVIDUAL application_type_JOINT verification_status_joint_Not Verified verification_status_joint_Source Verified verification_status_joint_Verified
0 68009401 72868139.0 16000.0 16000.0 16000.0 14.85 379.39 48000.0 33.18 0.0 ... 0 0 0 0 0 1 0 0 0 0
1 68354783 73244544.0 9600.0 9600.0 9600.0 7.49 298.58 60000.0 22.44 0.0 ... 0 0 0 0 0 1 0 0 0 0
2 68466916 73356753.0 25000.0 25000.0 25000.0 7.49 777.55 109000.0 26.02 0.0 ... 0 0 0 0 0 1 0 0 0 0
3 68466961 73356799.0 28000.0 28000.0 28000.0 6.49 858.05 92000.0 21.60 0.0 ... 0 0 0 0 0 1 0 0 0 0
4 68495092 73384866.0 8650.0 8650.0 8650.0 19.89 320.99 55000.0 25.49 0.0 ... 0 0 0 0 0 1 0 0 0 0

5 rows × 202 columns


In [11]:
X.isnull().any()


Out[11]:
id                                           False
member_id                                    False
loan_amnt                                    False
funded_amnt                                  False
funded_amnt_inv                              False
int_rate                                     False
installment                                  False
annual_inc                                   False
dti                                          False
delinq_2yrs                                  False
inq_last_6mths                               False
open_acc                                     False
pub_rec                                      False
revol_bal                                    False
total_acc                                    False
out_prncp                                    False
out_prncp_inv                                False
total_pymnt                                  False
total_pymnt_inv                              False
total_rec_prncp                              False
total_rec_int                                False
total_rec_late_fee                           False
recoveries                                   False
collection_recovery_fee                      False
last_pymnt_amnt                              False
collections_12_mths_ex_med                   False
policy_code                                  False
acc_now_delinq                               False
tot_coll_amt                                 False
tot_cur_bal                                  False
                                             ...  
last_credit_pull_d_Apr-2016                  False
last_credit_pull_d_Aug-2015                  False
last_credit_pull_d_Aug-2016                  False
last_credit_pull_d_Dec-2014                  False
last_credit_pull_d_Dec-2015                  False
last_credit_pull_d_Dec-2016                  False
last_credit_pull_d_Feb-2015                  False
last_credit_pull_d_Feb-2016                  False
last_credit_pull_d_Jan-2015                  False
last_credit_pull_d_Jan-2016                  False
last_credit_pull_d_Jan-2017                  False
last_credit_pull_d_Jul-2015                  False
last_credit_pull_d_Jul-2016                  False
last_credit_pull_d_Jun-2015                  False
last_credit_pull_d_Jun-2016                  False
last_credit_pull_d_Mar-2015                  False
last_credit_pull_d_Mar-2016                  False
last_credit_pull_d_May-2015                  False
last_credit_pull_d_May-2016                  False
last_credit_pull_d_Nov-2015                  False
last_credit_pull_d_Nov-2016                  False
last_credit_pull_d_Oct-2015                  False
last_credit_pull_d_Oct-2016                  False
last_credit_pull_d_Sep-2015                  False
last_credit_pull_d_Sep-2016                  False
application_type_INDIVIDUAL                  False
application_type_JOINT                       False
verification_status_joint_Not Verified       False
verification_status_joint_Source Verified    False
verification_status_joint_Verified           False
Length: 202, dtype: bool

In [12]:
X1 = X.drop(X.columns[[26,88,95,98,131,139,197,198,199,200,201]],axis=1).head()
X1 = X.drop(X[['delinq_2yrs', 'inq_last_6mths','open_acc','pub_rec','revol_bal','total_acc','last_credit_pull_d_Apr-2016',
'last_credit_pull_d_Aug-2015',                  
'last_credit_pull_d_Aug-2016',                  
'last_credit_pull_d_Dec-2014',                 
'last_credit_pull_d_Dec-2015',                  
'last_credit_pull_d_Dec-2016',                  
'last_credit_pull_d_Feb-2015',                  
'last_credit_pull_d_Feb-2016',                  
'last_credit_pull_d_Jan-2015',                  
'last_credit_pull_d_Jan-2016',                  
'last_credit_pull_d_Jan-2017',                  
'last_credit_pull_d_Jul-2015',                 
'last_credit_pull_d_Jul-2016',                 
'last_credit_pull_d_Jun-2015',                
'last_credit_pull_d_Jun-2016',               
'last_credit_pull_d_Mar-2015',              
'last_credit_pull_d_Mar-2016',             
'last_credit_pull_d_May-2015',            
'last_credit_pull_d_May-2016',           
'last_credit_pull_d_Nov-2015',          
'last_credit_pull_d_Nov-2016',         
'last_credit_pull_d_Oct-2015',        
'last_credit_pull_d_Oct-2016',       
'last_credit_pull_d_Sep-2015',      
'last_credit_pull_d_Sep-2016',    
'application_type_INDIVIDUAL',     
'application_type_JOINT',                     
'verification_status_joint_Not Verified',   
'verification_status_joint_Source Verified',  
'verification_status_joint_Verified',
'last_pymnt_d_Apr-2015',
'last_pymnt_d_Apr-2016',            
'last_pymnt_d_Aug-2015',            
'last_pymnt_d_Aug-2016',
'last_pymnt_d_Dec-2015',           
'last_pymnt_d_Dec-2016',           
'last_pymnt_d_Feb-2015',           
'last_pymnt_d_Feb-2016',           
'last_pymnt_d_Jan-2015',           
'last_pymnt_d_Jan-2016',           
'last_pymnt_d_Jan-2017',           
'last_pymnt_d_Jul-2015',           
'last_pymnt_d_Jul-2016',            
'last_pymnt_d_Jun-2015',            
'last_pymnt_d_Jun-2016',            
'last_pymnt_d_Mar-2015',            
'last_pymnt_d_Mar-2016',            
'last_pymnt_d_May-2015',            
'last_pymnt_d_May-2016',            
'last_pymnt_d_Nov-2015',            
'last_pymnt_d_Nov-2016',            
'last_pymnt_d_Oct-2015',            
'last_pymnt_d_Oct-2016',            
'last_pymnt_d_Sep-2015',            
'last_pymnt_d_Sep-2016',           
'next_pymnt_d_Feb-2017',            
'next_pymnt_d_Jan-2017',           
'next_pymnt_d_Jul-2016',            
'next_pymnt_d_Mar-2017',
'last_credit_pull_d_Apr-2015',
'purpose_wedding',                                    
'title_Business',                                     
'title_Car financing',                                
'title_Credit Card/Auto Repair',                      
'title_Credit card refinancing',                      
'title_Debt consolidation',                           
'title_DebtC',                                        
'title_Green loan',                                   
'title_Home buying',                                  
'title_Home improvement',                             
'title_Learning and training',                        
'title_Major purchase',                               
'title_Medical expenses',                             
'title_Moving and relocation',                        
'title_New Baby and New House (CC Consolidate)',      
'title_Other',                                        
'title_Pay off Lowes Card',                           
'title_Paying off higher interest cards & auto',      
'title_Prescription Drug and Medical Costs',          
'title_SAVE',                                         
'title_Simple Loan Until Contract Is Completed',      
'title_Student Loan',                                 
'title_Trying to come back to reality!',              
'title_Vacation',                                     
'title_considerate',                                  
'title_new day',                                      
'title_new kitchen for momma!',                       
'title_odymeds',                                      
'initial_list_status_f',                              
'initial_list_status_w',
'home_ownership_RENT',
'verification_status_Not Verified',       
'verification_status_Source Verified',    
'verification_status_Verified',           
'issue_d_Apr-2015',                       
'issue_d_Aug-2015',                       
'issue_d_Dec-2015',                       
'issue_d_Feb-2015',                       
'issue_d_Jan-2015',                       
'issue_d_Jul-2015',                       
'issue_d_Jun-2015',                       
'issue_d_Mar-2015',                       
'issue_d_May-2015',                       
'issue_d_Nov-2015',                       
'issue_d_Oct-2015',                       
'issue_d_Sep-2015',                       
'pymnt_plan_n',                           
'purpose_car',                            
'purpose_credit_card',                    
'purpose_debt_consolidation',             
'purpose_educational',                    
'purpose_home_improvement',               
'purpose_house',                          
'purpose_major_purchase',                 
'purpose_medical',                        
'purpose_moving',                         
'purpose_other',                          
'purpose_renewable_energy',              
               'purpose_small_business', 
'purpose_vacation', 
'term_ 36 months',                
'term_ 60 months',                
'grade_A',                        
'grade_B',                        
'grade_C',                        
'grade_D',                        
'grade_E',                        
'grade_F',                        
'grade_G',                        
'emp_length_1 year',              
'emp_length_10+ years',           
'emp_length_2 years',             
'emp_length_3 years',             
'emp_length_4 years',             
'emp_length_5 years',             
'emp_length_6 years',             
'emp_length_7 years',             
'emp_length_8 years',             
'emp_length_9 years',             
'emp_length_< 1 year',            
'emp_length_n/a',                 
'home_ownership_ANY',             
'home_ownership_MORTGAGE',        
'home_ownership_OWN',
'num_accts_ever_120_pd',        
'num_actv_bc_tl',                
'num_actv_rev_tl',               
'num_bc_sats',                   
'num_bc_tl',                     
'num_il_tl',                     'num_op_rev_tl',   
'num_rev_tl_bal_gt_0',           
'num_sats',                      
'num_tl_30dpd',                  
'num_tl_90g_dpd_24m',            
'num_tl_op_past_12m',            
'pct_tl_nvr_dlq',                
'pub_rec_bankruptcies',          
'tax_liens',                     
'tot_hi_cred_lim', 'out_prncp'            
              ]],axis=1)

In [13]:
# Make the correlation matrix.
corrmat = X1.corr()
print(corrmat)


                                  id  member_id  loan_amnt  funded_amnt  \
id                          1.000000   0.997540  -0.008288    -0.008288   
member_id                   0.997540   1.000000  -0.008620    -0.008620   
loan_amnt                  -0.008288  -0.008620   1.000000     1.000000   
funded_amnt                -0.008288  -0.008620   1.000000     1.000000   
funded_amnt_inv            -0.008554  -0.008885   0.999994     0.999994   
int_rate                   -0.053402  -0.053447   0.140572     0.140572   
installment                -0.012919  -0.013274   0.941205     0.941205   
annual_inc                  0.015055   0.014958   0.305734     0.305734   
dti                         0.005692   0.005806   0.006103     0.006103   
out_prncp_inv               0.192058   0.191755   0.623961     0.623961   
total_pymnt                -0.219883  -0.219945   0.706184     0.706184   
total_pymnt_inv            -0.220078  -0.220139   0.706157     0.706157   
total_rec_prncp            -0.188353  -0.188443   0.556266     0.556266   
total_rec_int              -0.165885  -0.165861   0.736585     0.736585   
total_rec_late_fee         -0.015779  -0.015748   0.051888     0.051888   
recoveries                 -0.070027  -0.069799   0.077870     0.077870   
collection_recovery_fee    -0.070594  -0.070372   0.079190     0.079190   
last_pymnt_amnt            -0.068844  -0.068974   0.259864     0.259864   
collections_12_mths_ex_med  0.011953   0.011891  -0.027454    -0.027454   
policy_code                      NaN        NaN        NaN          NaN   
acc_now_delinq             -0.004834  -0.004732   0.000645     0.000645   
tot_coll_amt                0.002312   0.002329  -0.021437    -0.021437   
tot_cur_bal                 0.011241   0.010903   0.325352     0.325352   
total_rev_hi_lim            0.019249   0.019148   0.330877     0.330877   
acc_open_past_24mths        0.015502   0.015281  -0.010114    -0.010114   
avg_cur_bal                 0.007556   0.007317   0.238156     0.238156   
chargeoff_within_12_mths    0.001075   0.000655  -0.005382    -0.005382   
delinq_amnt                 0.001961   0.001977  -0.002096    -0.002096   
mo_sin_old_rev_tl_op        0.004593   0.004731   0.183754     0.183754   
mo_sin_rcnt_rev_tl_op       0.009478   0.009476   0.066548     0.066548   
mo_sin_rcnt_tl             -0.005305  -0.005469   0.041686     0.041686   
mort_acc                   -0.013036  -0.013261   0.231574     0.231574   
total_bal_ex_mort           0.012864   0.012688   0.289565     0.289565   
total_bc_limit              0.017944   0.017822   0.395843     0.395843   
total_il_high_credit_limit  0.020870   0.020684   0.203093     0.203093   

                            funded_amnt_inv  int_rate  installment  \
id                                -0.008554 -0.053402    -0.012919   
member_id                         -0.008885 -0.053447    -0.013274   
loan_amnt                          0.999994  0.140572     0.941205   
funded_amnt                        0.999994  0.140572     0.941205   
funded_amnt_inv                    1.000000  0.140209     0.941187   
int_rate                           0.140209  1.000000     0.124426   
installment                        0.941187  0.124426     1.000000   
annual_inc                         0.305803 -0.090399     0.297393   
dti                                0.006044  0.077932     0.001893   
out_prncp_inv                      0.623899  0.106081     0.506745   
total_pymnt                        0.706259  0.087256     0.732579   
total_pymnt_inv                    0.706239  0.087028     0.732545   
total_rec_prncp                    0.556377 -0.057379     0.608280   
total_rec_int                      0.736483  0.504617     0.654990   
total_rec_late_fee                 0.051861  0.063685     0.063739   
recoveries                         0.077869  0.119825     0.082061   
collection_recovery_fee            0.079186  0.123180     0.084164   
last_pymnt_amnt                    0.259878  0.069755     0.264130   
collections_12_mths_ex_med        -0.027480  0.019234    -0.022146   
policy_code                             NaN       NaN          NaN   
acc_now_delinq                     0.000630  0.027495     0.004108   
tot_coll_amt                      -0.021435  0.009697    -0.016802   
tot_cur_bal                        0.325401 -0.087011     0.295468   
total_rev_hi_lim                   0.330958 -0.174886     0.303476   
acc_open_past_24mths              -0.010221  0.228782     0.010295   
avg_cur_bal                        0.238198 -0.081874     0.215902   
chargeoff_within_12_mths          -0.005397  0.011913    -0.002996   
delinq_amnt                       -0.002097  0.009647    -0.000760   
mo_sin_old_rev_tl_op               0.183840 -0.160134     0.158800   
mo_sin_rcnt_rev_tl_op              0.066593 -0.122028     0.047059   
mo_sin_rcnt_tl                     0.041750 -0.150179     0.027120   
mort_acc                           0.231630 -0.076131     0.202128   
total_bal_ex_mort                  0.289554 -0.000676     0.265224   
total_bc_limit                     0.395973 -0.257222     0.358718   
total_il_high_credit_limit         0.203085  0.004055     0.181215   

                            annual_inc       dti  out_prncp_inv  \
id                            0.015055  0.005692       0.192058   
member_id                     0.014958  0.005806       0.191755   
loan_amnt                     0.305734  0.006103       0.623961   
funded_amnt                   0.305734  0.006103       0.623961   
funded_amnt_inv               0.305803  0.006044       0.623899   
int_rate                     -0.090399  0.077932       0.106081   
installment                   0.297393  0.001893       0.506745   
annual_inc                    1.000000 -0.068237       0.171836   
dti                          -0.068237  1.000000       0.022736   
out_prncp_inv                 0.171836  0.022736       1.000000   
total_pymnt                   0.233817 -0.015898       0.048092   
total_pymnt_inv               0.233854 -0.015935       0.048055   
total_rec_prncp               0.209988 -0.029406      -0.135268   
total_rec_int                 0.157346  0.039614       0.684557   
total_rec_late_fee            0.025224 -0.001150       0.004228   
recoveries                    0.011110  0.009553      -0.127730   
collection_recovery_fee       0.010418  0.010185      -0.128250   
last_pymnt_amnt               0.098264 -0.018194      -0.371058   
collections_12_mths_ex_med   -0.007785 -0.003588      -0.014463   
policy_code                        NaN       NaN            NaN   
acc_now_delinq                0.012628  0.000583      -0.001037   
tot_coll_amt                 -0.000509 -0.007619      -0.013646   
tot_cur_bal                   0.379008 -0.005142       0.185201   
total_rev_hi_lim              0.256553  0.026437       0.195038   
acc_open_past_24mths          0.045446  0.055647      -0.063092   
avg_cur_bal                   0.293642 -0.050366       0.128512   
chargeoff_within_12_mths      0.007422 -0.002340      -0.003882   
delinq_amnt                   0.005693 -0.002667      -0.001822   
mo_sin_old_rev_tl_op          0.130148  0.018276       0.136148   
mo_sin_rcnt_rev_tl_op         0.031403 -0.012088       0.075998   
mo_sin_rcnt_tl               -0.021290 -0.037806       0.060175   
mort_acc                      0.209046 -0.020044       0.123226   
total_bal_ex_mort             0.297563  0.100831       0.187550   
total_bc_limit                0.257184  0.012969       0.231732   
total_il_high_credit_limit    0.242637  0.113685       0.134440   

                                       ...              avg_cur_bal  \
id                                     ...                 0.007556   
member_id                              ...                 0.007317   
loan_amnt                              ...                 0.238156   
funded_amnt                            ...                 0.238156   
funded_amnt_inv                        ...                 0.238198   
int_rate                               ...                -0.081874   
installment                            ...                 0.215902   
annual_inc                             ...                 0.293642   
dti                                    ...                -0.050366   
out_prncp_inv                          ...                 0.128512   
total_pymnt                            ...                 0.198755   
total_pymnt_inv                        ...                 0.198775   
total_rec_prncp                        ...                 0.183546   
total_rec_int                          ...                 0.119077   
total_rec_late_fee                     ...                 0.022916   
recoveries                             ...                -0.003055   
collection_recovery_fee                ...                -0.004201   
last_pymnt_amnt                        ...                 0.111025   
collections_12_mths_ex_med             ...                -0.016492   
policy_code                            ...                      NaN   
acc_now_delinq                         ...                 0.017039   
tot_coll_amt                           ...                -0.005693   
tot_cur_bal                            ...                 0.834840   
total_rev_hi_lim                       ...                 0.225868   
acc_open_past_24mths                   ...                -0.102708   
avg_cur_bal                            ...                 1.000000   
chargeoff_within_12_mths               ...                 0.007553   
delinq_amnt                            ...                 0.016753   
mo_sin_old_rev_tl_op                   ...                 0.127582   
mo_sin_rcnt_rev_tl_op                  ...                 0.163660   
mo_sin_rcnt_tl                         ...                 0.043938   
mort_acc                               ...                 0.452144   
total_bal_ex_mort                      ...                 0.297674   
total_bc_limit                         ...                 0.148730   
total_il_high_credit_limit             ...                 0.201870   

                            chargeoff_within_12_mths  delinq_amnt  \
id                                          0.001075     0.001961   
member_id                                   0.000655     0.001977   
loan_amnt                                  -0.005382    -0.002096   
funded_amnt                                -0.005382    -0.002096   
funded_amnt_inv                            -0.005397    -0.002097   
int_rate                                    0.011913     0.009647   
installment                                -0.002996    -0.000760   
annual_inc                                  0.007422     0.005693   
dti                                        -0.002340    -0.002667   
out_prncp_inv                              -0.003882    -0.001822   
total_pymnt                                -0.004289    -0.001490   
total_pymnt_inv                            -0.004301    -0.001490   
total_rec_prncp                            -0.005555    -0.002139   
total_rec_int                               0.002333     0.001520   
total_rec_late_fee                          0.004216     0.000787   
recoveries                                  0.002795     0.001075   
collection_recovery_fee                     0.003160     0.001137   
last_pymnt_amnt                            -0.003493    -0.000646   
collections_12_mths_ex_med                  0.042027     0.008177   
policy_code                                      NaN          NaN   
acc_now_delinq                              0.034094     0.127571   
tot_coll_amt                                0.013216     0.002142   
tot_cur_bal                                 0.006576     0.017337   
total_rev_hi_lim                           -0.013072    -0.000032   
acc_open_past_24mths                        0.007793    -0.000521   
avg_cur_bal                                 0.007553     0.016753   
chargeoff_within_12_mths                    1.000000     0.007182   
delinq_amnt                                 0.007182     1.000000   
mo_sin_old_rev_tl_op                        0.034235     0.006876   
mo_sin_rcnt_rev_tl_op                      -0.001974     0.001056   
mo_sin_rcnt_tl                             -0.005090     0.002235   
mort_acc                                    0.033692     0.015543   
total_bal_ex_mort                          -0.006502     0.000659   
total_bc_limit                             -0.020472    -0.001576   
total_il_high_credit_limit                  0.003073    -0.000250   

                            mo_sin_old_rev_tl_op  mo_sin_rcnt_rev_tl_op  \
id                                      0.004593               0.009478   
member_id                               0.004731               0.009476   
loan_amnt                               0.183754               0.066548   
funded_amnt                             0.183754               0.066548   
funded_amnt_inv                         0.183840               0.066593   
int_rate                               -0.160134              -0.122028   
installment                             0.158800               0.047059   
annual_inc                              0.130148               0.031403   
dti                                     0.018276              -0.012088   
out_prncp_inv                           0.136148               0.075998   
total_pymnt                             0.119081               0.024781   
total_pymnt_inv                         0.119130               0.024807   
total_rec_prncp                         0.109807               0.024743   
total_rec_int                           0.074804               0.012598   
total_rec_late_fee                     -0.007191               0.000203   
recoveries                             -0.014001              -0.018470   
collection_recovery_fee                -0.015111              -0.019114   
last_pymnt_amnt                         0.032103              -0.018430   
collections_12_mths_ex_med             -0.005951              -0.013079   
policy_code                                  NaN                    NaN   
acc_now_delinq                          0.024103               0.003627   
tot_coll_amt                            0.033676              -0.014387   
tot_cur_bal                             0.181803               0.039669   
total_rev_hi_lim                        0.261317              -0.036778   
acc_open_past_24mths                   -0.056390              -0.413834   
avg_cur_bal                             0.127582               0.163660   
chargeoff_within_12_mths                0.034235              -0.001974   
delinq_amnt                             0.006876               0.001056   
mo_sin_old_rev_tl_op                    1.000000               0.072080   
mo_sin_rcnt_rev_tl_op                   0.072080               1.000000   
mo_sin_rcnt_tl                          0.045998               0.618613   
mort_acc                                0.291468               0.020292   
total_bal_ex_mort                       0.104659               0.019521   
total_bc_limit                          0.279812              -0.004718   
total_il_high_credit_limit              0.013877               0.004062   

                            mo_sin_rcnt_tl  mort_acc  total_bal_ex_mort  \
id                               -0.005305 -0.013036           0.012864   
member_id                        -0.005469 -0.013261           0.012688   
loan_amnt                         0.041686  0.231574           0.289565   
funded_amnt                       0.041686  0.231574           0.289565   
funded_amnt_inv                   0.041750  0.231630           0.289554   
int_rate                         -0.150179 -0.076131          -0.000676   
installment                       0.027120  0.202128           0.265224   
annual_inc                       -0.021290  0.209046           0.297563   
dti                              -0.037806 -0.020044           0.100831   
out_prncp_inv                     0.060175  0.123226           0.187550   
total_pymnt                       0.008806  0.195664           0.198004   
total_pymnt_inv                   0.008841  0.195691           0.197993   
total_rec_prncp                   0.014739  0.178249           0.159386   
total_rec_int                    -0.012017  0.125695           0.195364   
total_rec_late_fee               -0.009785 -0.000210           0.020319   
recoveries                       -0.024589 -0.002563           0.018588   
collection_recovery_fee          -0.026041 -0.002358           0.019189   
last_pymnt_amnt                  -0.032823  0.113548           0.076230   
collections_12_mths_ex_med       -0.010853 -0.010133          -0.018917   
policy_code                            NaN       NaN                NaN   
acc_now_delinq                    0.000093  0.022914           0.008106   
tot_coll_amt                     -0.012672  0.016086          -0.017391   
tot_cur_bal                      -0.055598  0.524410           0.526932   
total_rev_hi_lim                 -0.018307  0.211589           0.437185   
acc_open_past_24mths             -0.446766  0.056884           0.137259   
avg_cur_bal                       0.043938  0.452144           0.297674   
chargeoff_within_12_mths         -0.005090  0.033692          -0.006502   
delinq_amnt                       0.002235  0.015543           0.000659   
mo_sin_old_rev_tl_op              0.045998  0.291468           0.104659   
mo_sin_rcnt_rev_tl_op             0.618613  0.020292           0.019521   
mo_sin_rcnt_tl                    1.000000 -0.048839          -0.076818   
mort_acc                         -0.048839  1.000000           0.142379   
total_bal_ex_mort                -0.076818  0.142379           1.000000   
total_bc_limit                    0.014742  0.202754           0.294523   
total_il_high_credit_limit       -0.113229  0.097536           0.864517   

                            total_bc_limit  total_il_high_credit_limit  
id                                0.017944                    0.020870  
member_id                         0.017822                    0.020684  
loan_amnt                         0.395843                    0.203093  
funded_amnt                       0.395843                    0.203093  
funded_amnt_inv                   0.395973                    0.203085  
int_rate                         -0.257222                    0.004055  
installment                       0.358718                    0.181215  
annual_inc                        0.257184                    0.242637  
dti                               0.012969                    0.113685  
out_prncp_inv                     0.231732                    0.134440  
total_pymnt                       0.287666                    0.133856  
total_pymnt_inv                   0.287740                    0.133848  
total_rec_prncp                   0.276029                    0.104393  
total_rec_int                     0.135311                    0.142474  
total_rec_late_fee               -0.011567                    0.020241  
recoveries                       -0.000129                    0.018024  
collection_recovery_fee          -0.002077                    0.018618  
last_pymnt_amnt                   0.112906                    0.055168  
collections_12_mths_ex_med       -0.032237                   -0.004587  
policy_code                            NaN                         NaN  
acc_now_delinq                    0.002061                    0.016714  
tot_coll_amt                     -0.038017                   -0.003967  
tot_cur_bal                       0.327199                    0.401982  
total_rev_hi_lim                  0.711588                    0.100907  
acc_open_past_24mths              0.020010                    0.174322  
avg_cur_bal                       0.148730                    0.201870  
chargeoff_within_12_mths         -0.020472                    0.003073  
delinq_amnt                      -0.001576                   -0.000250  
mo_sin_old_rev_tl_op              0.279812                    0.013877  
mo_sin_rcnt_rev_tl_op            -0.004718                    0.004062  
mo_sin_rcnt_tl                    0.014742                   -0.113229  
mort_acc                          0.202754                    0.097536  
total_bal_ex_mort                 0.294523                    0.864517  
total_bc_limit                    1.000000                    0.102987  
total_il_high_credit_limit        0.102987                    1.000000  

[35 rows x 35 columns]

In [14]:
cross_val_score(rfc, X1, Y, cv=10)


Out[14]:
array([ 0.9328679 ,  0.9677044 ,  0.96597089,  0.96274133,  0.96399905,
        0.96214676,  0.96349949,  0.96133843,  0.96098036,  0.96012445])

In [16]:
cross_val_score(rfc, X1, Y, cv=10).mean()


Out[16]:
0.96198005448780355