Lending Club Data



In [2]:

    
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

from sklearn.feature_selection import RFE

from sklearn.svm import SVR
from sklearn.svm import LinearSVC
from sklearn.svm import LinearSVR

import seaborn as sns
import matplotlib.pylab as pl
%matplotlib inline
#import matplotlib.pyplot as plt

Columns Interested

loan_status -- Current status of the loan

loan_amnt -- The listed amount of the loan applied for by the borrower. If at some point in time, the credit department reduces the loan amount, then it will be reflected in this value.
int_rate -- interest rate of the loan
grade -- LC assigned loan grade
sub_grade -- LC assigned sub loan grade

purpose -- A category provided by the borrower for the loan request.
-- dummy

annual_inc -- The self-reported annual income provided by the borrower during registration.
emp_length -- Employment length in years. Possible values are between 0 and 10 where 0 means less than one year and 10 means ten or more years.
-- dummie

fico_range_low fico_range_high

home_ownership -- The home ownership status provided by the borrower during registration or obtained from the credit report. Our values are: RENT, OWN, MORTGAGE, OTHER
tot_cur_bal -- Total current balance of all accounts
num_actv_bc_tl -- number of active bank accounts
(avg_cur_bal -- average current balance of all accounts )

mort_acc -- number of mortgage accounts

num_actv_rev_tl -- Number of currently active revolving trades
dti -- A ratio calculated using the borrower’s total monthly debt payments on the total debt obligations, excluding mortgage and the requested LC loan, divided by the borrower’s self-reported monthly income.
pub_rec_bankruptcies - Number of public record bankruptcies

delinq_amnt --

title -- mths_since_last_delinq -- The number of months since the borrower's last delinquency.
mths_since_recent_revol_delinq -- Months since most recent revolving delinquency.
total_cu_tl -- Number of finance trades
last_credit_pull_d -- The most recent month LC pulled credit for this loan



In [76]:

    
## 2015
df_app_2015 = pd.read_csv('data/LoanStats3d_securev1.csv.zip', compression='zip', low_memory=False,\
                         header=1)



In [77]:

    
df_app_2015.loan_status.unique()









    Out[77]:





array(['Current', 'Fully Paid', 'Default', 'Charged Off',
       'Late (16-30 days)', 'Late (31-120 days)', 'In Grace Period', nan], dtype=object)



In [78]:

    
df_app_2015.head(5)









    Out[78]:






  
    
      
      id
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      term
      int_rate
      installment
      grade
      sub_grade
      ...
      num_tl_90g_dpd_24m
      num_tl_op_past_12m
      pct_tl_nvr_dlq
      percent_bc_gt_75
      pub_rec_bankruptcies
      tax_liens
      tot_hi_cred_lim
      total_bal_ex_mort
      total_bc_limit
      total_il_high_credit_limit
    
  
  
    
      0
      68615169
      NaN
      16000.0
      16000.0
      16000.0
      60 months
      8.49%
      328.19
      B
      B1
      ...
      0.0
      3.0
      87.5
      50.0
      0.0
      0.0
      307343.0
      64371.0
      23500.0
      59526.0
    
    
      1
      68537564
      NaN
      8000.0
      8000.0
      8000.0
      36 months
      10.78%
      261.08
      B
      B4
      ...
      0.0
      4.0
      97.1
      100.0
      0.0
      0.0
      168819.0
      148154.0
      5800.0
      136996.0
    
    
      2
      68536799
      NaN
      10000.0
      10000.0
      10000.0
      36 months
      10.78%
      326.35
      B
      B4
      ...
      0.0
      2.0
      100.0
      33.3
      0.0
      0.0
      190694.0
      40681.0
      13900.0
      29512.0
    
    
      3
      68355089
      NaN
      24700.0
      24700.0
      24700.0
      36 months
      11.99%
      820.28
      C
      C1
      ...
      0.0
      2.0
      97.4
      7.7
      0.0
      0.0
      314017.0
      39475.0
      79300.0
      24667.0
    
    
      4
      68506885
      NaN
      10000.0
      10000.0
      10000.0
      60 months
      11.99%
      222.40
      C
      C1
      ...
      0.0
      1.0
      100.0
      50.0
      0.0
      0.0
      61054.0
      41166.0
      13100.0
      46854.0
    
  

5 rows × 115 columns



In [92]:

    
df_app_2015['delinq_amnt'].unique()









    Out[92]:





array([  0.00000000e+00,   6.87000000e+02,   4.00000000e+01,
         3.60000000e+01,   6.00000000e+00,   6.20000000e+01,
         3.30000000e+01,   1.62000000e+02,   1.39400000e+03,
         7.84900000e+03,   8.96000000e+02,   3.76000000e+02,
         1.49200000e+03,   1.40000000e+01,   1.00000000e+00,
         3.03000000e+02,   1.96100000e+03,   6.02000000e+02,
         2.73000000e+03,   3.86650000e+04,   2.59000000e+02,
         1.65000000e+02,   3.39000000e+02,   2.49460000e+04,
         2.89000000e+02,   3.42000000e+02,   2.48000000e+02,
         3.50000000e+01,   5.30000000e+01,   6.50000000e+04,
         1.56000000e+02,   3.00000000e+01,   3.30000000e+02,
         3.43000000e+02,   1.35330000e+04,   7.87000000e+02,
         4.59000000e+03,   1.84000000e+02,   2.00000000e+00,
         1.80000000e+01,   3.34300000e+03,   9.41000000e+02,
         1.33400000e+03,   9.40000000e+01,   1.04300000e+03,
         1.89200000e+03,   4.90000000e+02,   1.26000000e+02,
         2.07080000e+04,   2.58000000e+02,   6.10800000e+03,
         1.14600000e+03,   7.10000000e+01,   2.53000000e+02,
         9.17000000e+02,   3.70000000e+02,   8.82160000e+04,
         1.93000000e+02,   1.64000000e+02,   9.60000000e+01,
         4.40000000e+01,   1.26600000e+03,   4.80000000e+01,
         5.40000000e+01,   1.01000000e+02,   8.90000000e+01,
         4.90000000e+01,   5.10000000e+01,   3.77000000e+02,
         2.20000000e+01,   8.15000000e+02,   1.39200000e+03,
         7.50000000e+01,   4.06200000e+03,   6.27100000e+03,
         5.60000000e+01,   2.52000000e+02,   7.62000000e+02,
         2.49000000e+02,   7.65000000e+02,   1.83000000e+02,
         2.33100000e+03,   5.71000000e+02,   7.38000000e+02,
         1.77520000e+04,   1.85100000e+03,   2.99100000e+03,
         3.02000000e+02,   2.72000000e+02,   7.05000000e+02,
         4.57000000e+02,   6.67000000e+02,   6.81000000e+02,
         1.20000000e+01,   1.00000000e+01,   4.80000000e+02,
         2.67000000e+02,   3.40000000e+01,   1.54000000e+02,
         2.29000000e+02,   4.15000000e+02,   2.90000000e+01,
         1.24000000e+02,   1.26100000e+03,   1.73000000e+02,
         3.84000000e+02,   7.60000000e+01,   5.78000000e+02,
         3.56000000e+02,   8.20000000e+01,   2.46600000e+03,
         2.01000000e+02,   8.14000000e+02,   1.48000000e+02,
         4.29100000e+03,   5.20000000e+01,   1.19000000e+02,
         1.59000000e+02,   7.49000000e+02,   3.16000000e+02,
         5.85000000e+02,   4.91850000e+04,   1.79000000e+02,
         5.70000000e+01,   1.69000000e+02,   1.32000000e+02,
         2.70000000e+01,   1.24800000e+03,   5.42000000e+02,
         2.60000000e+02,   9.66000000e+02,   2.47000000e+02,
         1.90000000e+01,   8.56000000e+02,   8.60000000e+01,
         6.76700000e+03,   7.00000000e+01,   6.00000000e+01,
         1.10000000e+01,   2.00000000e+01,   6.42600000e+03,
         4.79000000e+02,   8.40000000e+01,   1.08000000e+02,
         2.70000000e+02,   7.20000000e+01,   3.33000000e+02,
         5.27100000e+03,   1.71000000e+03,   4.37000000e+02,
         6.65000000e+02,   2.50000000e+01,   1.99000000e+02,
         2.59170000e+04,   1.27000000e+02,   8.42800000e+03,
         4.20000000e+01,   2.93000000e+02,   5.50100000e+03,
         6.36000000e+02,   1.00870000e+04,   1.03800000e+03,
         4.65000000e+02,   9.70000000e+01,   1.74000000e+02,
         4.23660000e+04,   1.22100000e+03,   7.43800000e+03,
         1.04900000e+03,   6.10000000e+01,   7.01200000e+03,
         9.00000000e+00,   9.90000000e+01,   4.84000000e+02,
         2.95000000e+02,   1.09000000e+02,   2.94600000e+03,
         2.32000000e+02,   8.36000000e+02,   3.88100000e+03,
         1.99100000e+03,   1.32200000e+03,   5.61000000e+02,
         1.30000000e+01,   2.34000000e+02,   7.40000000e+01,
         6.38000000e+02,   1.16000000e+02,   4.34000000e+02,
         7.59000000e+02,   1.17000000e+02,   4.89000000e+02,
         2.52400000e+03,   3.50600000e+03,   1.31000000e+02,
         3.15500000e+03,   1.39000000e+02,   2.40000000e+02,
         1.77000000e+02,   1.28710000e+04,   1.43000000e+02,
         5.00000000e+01,   6.59000000e+02,   3.23400000e+03,
         1.31100000e+03,   2.46200000e+03,   2.28000000e+02,
         3.71000000e+02,   3.80000000e+01,   1.38000000e+02,
         3.62400000e+03,   2.40000000e+01,   3.67500000e+03,
         3.24100000e+03,   1.57000000e+02,   1.30000000e+02,
         5.90000000e+01,   2.03000000e+02,   4.56800000e+03,
         8.34000000e+03,   9.56000000e+02,   6.82000000e+02,
         8.00000000e+01,   3.91600000e+03,   1.66000000e+02,
         3.59600000e+03,   8.50000000e+01,   4.97000000e+02,
         5.00000000e+00,   2.15000000e+02,   1.02000000e+02,
         1.29700000e+03,   7.70000000e+01,   5.51000000e+02,
         1.49000000e+02,   1.01900000e+03,   5.27000000e+02,
         1.45000000e+02,   1.28400000e+03,   1.85000000e+02,
         1.06000000e+02,   1.96000000e+02,   2.31000000e+02,
         2.75400000e+03,   2.56900000e+04,   8.10000000e+01,
         5.93000000e+02,   2.07000000e+02,   2.69800000e+03,
         2.11000000e+02,   2.75000000e+02,   3.96000000e+02,
         3.10500000e+03,   1.28000000e+02,   5.80000000e+01,
         6.58400000e+03,   5.28000000e+02,   4.63000000e+02,
         4.10000000e+02,   2.12000000e+02,   1.37700000e+03,
         4.25300000e+03,   2.75700000e+03,   4.18000000e+02,
         5.97000000e+02,   7.80000000e+01,   2.91700000e+03,
         2.48190000e+04,   1.47000000e+02,   1.55600000e+03,
         1.50800000e+03,   1.33180000e+04,   2.00900000e+03,
         2.62000000e+02,   8.00000000e+00,   2.64000000e+02,
         2.08000000e+02,   6.18900000e+03,   1.70100000e+03,
         4.50000000e+01,   3.58500000e+03,   9.93400000e+03,
         1.83400000e+03,   1.73300000e+03,   6.40000000e+01,
         1.44000000e+02,   6.15600000e+03,   4.87000000e+02,
         7.02000000e+02,   4.61000000e+02,   5.25000000e+02,
         4.13000000e+02,   2.08600000e+03,   1.63500000e+03,
         6.05400000e+03,   9.36300000e+03,   1.53000000e+02,
         7.00000000e+02,   9.62000000e+02,   7.41000000e+02,
         4.02000000e+02,   5.29000000e+02,   1.01950000e+04,
         2.10000000e+01,   7.94000000e+02,   3.59000000e+02,
         1.13880000e+04,   1.53140000e+04,   2.16000000e+02,
         2.24500000e+03,   8.11000000e+02,   7.47000000e+02,
         3.43000000e+03,   1.05000000e+02,   1.71000000e+02,
         2.33000000e+02,   1.31500000e+03,   7.69000000e+02,
         1.95500000e+03,   3.20000000e+01,   5.00000000e+02,
         1.63000000e+02,   5.67080000e+04,   4.25000000e+02,
         1.78000000e+02,   1.35000000e+02,   7.98000000e+02,
         1.70000000e+01,   4.03000000e+02,   3.96600000e+03,
         6.33000000e+02,   4.37840000e+04,   1.92000000e+02,
         2.44330000e+04,   1.00000000e+02,   2.39000000e+02,
         7.50000000e+02,   7.57000000e+02,   2.41100000e+03,
         5.33000000e+02,   6.49000000e+02,   1.40400000e+03,
         1.05100000e+03,   1.13700000e+03,   5.48000000e+02,
         1.25200000e+03,   2.13000000e+02,   3.10000000e+01,
         1.22000000e+02,   2.17000000e+02,   3.68000000e+02,
         9.10000000e+01,   3.26200000e+03,   4.24000000e+02,
         1.59177000e+05,   2.96000000e+02,   1.10100000e+03,
         2.27000000e+02,   1.95000000e+02,   9.20000000e+01,
         1.81800000e+03,   1.87000000e+02,   1.55000000e+03,
         2.80000000e+01,   5.50000000e+01,   2.92400000e+03,
         1.50000000e+01,   2.50000000e+02,   3.87000000e+03,
         2.98920000e+04,   4.46000000e+03,   9.91000000e+02,
         8.16300000e+03,   6.91000000e+02,   3.54000000e+02,
         2.43000000e+02,   5.09000000e+02,   1.19590000e+04,
         6.30000000e+01,   2.84790000e+04,   1.00100000e+03,
         2.42200000e+03,   6.45000000e+02,   8.12000000e+02,
         3.47000000e+02,   6.58000000e+02,   1.13660000e+04,
         6.70000000e+01,   7.67350000e+04,   2.81200000e+03,
         2.48410000e+04,   2.27700000e+03,   1.15400000e+03,
         1.13000000e+02,   1.62100000e+03,   1.55000000e+02,
         2.30000000e+01,   3.92000000e+02,   4.61700000e+03,
         8.30000000e+01,   3.24000000e+02,   1.41300000e+03,
         5.34000000e+02,   4.81000000e+02,   8.64000000e+02,
         9.71000000e+02,   7.45000000e+02,   3.10000000e+02,
         1.03000000e+02,   3.62320000e+04,   1.42700000e+03,
         5.06000000e+02,   3.03900000e+03,   8.80000000e+01,
         4.78000000e+02,   5.36190000e+04,   4.70000000e+01,
         5.88450000e+04,   1.08500000e+03,   2.35000000e+02,
         1.33000000e+02,   1.44000000e+03,   3.12500000e+03,
         1.57700000e+03,   1.37100000e+03,   5.35800000e+03,
         5.68000000e+02,   1.30990000e+04,   4.22000000e+02,
         3.22900000e+03,   1.36000000e+02,   3.25180000e+04,
         4.02610000e+04,   1.17000000e+03,   2.97000000e+02,
         9.82000000e+02,   1.87000000e+03,   3.00000000e+00,
         1.72900000e+03,   7.30000000e+01,   1.72100000e+03,
         9.92000000e+02,   9.28000000e+02,   7.80000000e+02,
         9.88000000e+02,   3.90000000e+01,   1.68000000e+02,
         9.02000000e+02,   2.73390000e+04,   1.37600000e+03,
         3.44900000e+03,   1.02300000e+03,   1.85800000e+03,
         1.90440000e+04,   3.03500000e+03,   2.93500000e+03,
         9.57000000e+02,   4.29040000e+04,   2.56000000e+02,
         2.66900000e+03,   1.51800000e+03,   1.91000000e+02,
         3.20000000e+02,   3.18000000e+02,   1.86000000e+02,
         4.35800000e+03,   3.88000000e+02,   3.04000000e+02,
         2.20000000e+02,   2.14000000e+02,   2.50000000e+03,
         4.60000000e+01,   1.22700000e+03,   1.11800000e+03,
         1.09360000e+04,   3.73100000e+03,   4.26000000e+02,
         1.61000000e+02,   5.32020000e+04,   6.50000000e+01,
         1.69690000e+04,   9.44000000e+02,   1.60000000e+01,
         4.42000000e+02,   4.38000000e+02,   6.24000000e+02,
         5.38000000e+02,   6.68000000e+02,   5.22100000e+03,
         9.79000000e+02,   1.97000000e+02,   6.04000000e+02,
         7.00000000e+00,   1.10000000e+02,   1.69000000e+04,
         1.20000000e+02,   3.80000000e+02,   6.80000000e+02,
         1.49700000e+03,   3.28600000e+03,   4.81900000e+03,
         6.18000000e+02,   4.10000000e+01,   1.87300000e+04,
         1.31400000e+03,   5.26500000e+03,   2.38200000e+03,
         3.13100000e+03,   2.95500000e+03,   1.44700000e+03,
         5.46000000e+02,   3.05000000e+02,   1.18000000e+02,
         9.55000000e+02,   8.20000000e+02,   3.17000000e+02,
         2.47900000e+04,   4.59000000e+02,   4.14000000e+02,
         8.35000000e+02,   1.04000000e+02,   1.07000000e+02,
         2.41000000e+02,   5.59000000e+02,   5.39000000e+02,
         9.12000000e+02,   3.48000000e+02,   2.06000000e+02,
         2.95200000e+03,   2.30000000e+02,   2.47400000e+03,
         3.69000000e+02,   1.10300000e+03,   5.16900000e+03,
         2.61000000e+02,   2.66000000e+02,   2.63300000e+03,
         4.08200000e+03,   2.51000000e+02,   1.51000000e+02,
         1.85000000e+03,   3.68600000e+03,   4.23000000e+02,
         1.32600000e+03,   1.42000000e+03,   9.70000000e+02,
         4.69000000e+02,   1.93300000e+03,   1.00800000e+03,
         7.17000000e+02,   9.30000000e+01,   4.74000000e+02,
         2.88600000e+03,   2.21400000e+03,   1.67800000e+03,
         4.20000000e+02,   3.25300000e+03,   3.50000000e+02,
         3.60400000e+03,   1.46000000e+02,   2.30720000e+04,
         5.69000000e+02,   5.65000000e+02,   1.75000000e+03,
         1.09000000e+03,   1.67900000e+03,   3.66900000e+03,
         3.51000000e+02,   1.60000000e+03,   2.55200000e+03,
         8.03000000e+02,   6.72000000e+02,   1.40600000e+03,
         6.41000000e+02,   7.22420000e+04,   4.36000000e+02,
         1.10950000e+04,   1.02130000e+04,   5.87930000e+04,
         3.78000000e+02,   2.91000000e+02,   1.23000000e+02,
         8.90000000e+02,   1.04800000e+04,   2.73000000e+02,
         6.02100000e+03,   8.70000000e+01,   6.97700000e+03,
         3.61000000e+02,   4.55400000e+03,   9.45210000e+04,
         6.90000000e+01,   3.98000000e+02,   6.77000000e+02,
         5.88800000e+03,   1.13750000e+04,   4.12000000e+02,
         6.66600000e+03,   2.26000000e+02,   6.95000000e+02,
         4.45000000e+02,   2.71630000e+04,   4.85000000e+02,
         2.17460000e+04,   5.92000000e+02,   3.00700000e+03,
         3.97000000e+02,   5.12000000e+02,   8.53000000e+03,
         6.93000000e+02,   1.37000000e+02,   9.46000000e+02,
         2.94000000e+02,   6.17000000e+02,   2.42000000e+02,
         1.98000000e+02,   1.36470000e+04,   4.29300000e+03,
         3.73000000e+02,   2.00500000e+03,   2.68000000e+02,
         5.03000000e+02,   1.75300000e+03,   1.86000000e+03,
         8.87800000e+03,   4.48000000e+02,   2.76600000e+03,
         1.02470000e+04,   4.40130000e+04,   2.74000000e+02,
         8.01000000e+02,   4.40000000e+02,   8.77000000e+02,
         5.24200000e+03,   1.42000000e+02,   1.91600000e+03,
         7.92000000e+02,   2.54000000e+02,   4.49000000e+02,
         5.99000000e+02,   5.15940000e+04,   4.72510000e+04,
         1.58480000e+04,   6.27000000e+02,   1.88000000e+02,
         1.83900000e+04,   2.00000000e+02,   4.30000000e+01,
         6.73200000e+03,   1.52000000e+02,   2.00000000e+03,
         2.55000000e+02,   4.93950000e+04,   1.20830000e+04,
         4.19000000e+02,   3.76200000e+03,   1.60000000e+02,
         2.67100000e+03,   1.25400000e+03,   2.10000000e+02,
         3.67000000e+02,   3.93000000e+02,   3.40000000e+03,
         4.60000000e+02,   8.80000000e+02,   1.46800000e+03,
         2.79000000e+02,   9.73000000e+02,   1.50700000e+03,
         9.00000000e+01,   1.14000000e+02,   3.76000000e+03,
         5.63380000e+04,   1.58300000e+03,   4.88000000e+02,
         2.00440000e+04,   2.00300000e+03,   4.29260000e+04,
         2.14500000e+03,   4.94000000e+02,   2.19000000e+02,
         3.70000000e+01,   2.12400000e+03,   3.79800000e+03,
         6.30400000e+03,   6.53000000e+02,   6.61000000e+02,
         1.21000000e+02,   1.95300000e+03,   6.60000000e+01,
         2.92000000e+02,   4.67930000e+04,   1.77100000e+03,
         4.30000000e+02,   3.90600000e+03,   5.14200000e+03,
         2.88000000e+02,   3.30700000e+03,   7.51400000e+03,
         1.85780000e+04,   6.97000000e+02,   1.97850000e+04,
         6.64970000e+04,   4.53000000e+02,   5.19000000e+02,
         3.12000000e+02,   1.05900000e+03,   2.45000000e+02,
         1.56100000e+03,   1.94000000e+02,   8.90000000e+03,
         2.09000000e+02,   4.59450000e+04,   1.64700000e+03,
         3.63900000e+03,   2.69200000e+03,   9.31000000e+02,
         4.51000000e+02,   3.36000000e+02,   2.65480000e+04,
         1.16300000e+03,   3.93200000e+03,   1.22900000e+03,
         1.91000000e+03,   8.57000000e+02,   1.00000000e+03,
         6.38670000e+04,   4.47000000e+02,   2.40500000e+04,
         3.60780000e+04,   5.79000000e+02,   6.51000000e+02,
         3.53000000e+02,   5.81000000e+02,   1.00400000e+03,
         2.44000000e+02,   7.68000000e+02,   3.75000000e+02,
         4.09000000e+02,   2.83000000e+02,   1.34000000e+02,
         3.65000000e+02,   1.76000000e+02,   2.99000000e+02,
         1.40180000e+04,   5.56000000e+02,   4.81940000e+04,
         1.94500000e+03,   2.41700000e+04,   1.57400000e+03,
         2.59200000e+03,   3.32000000e+02,   6.47000000e+02,
         1.25000000e+02,   1.72000000e+03,   3.56700000e+03,
         2.13000000e+03,   1.86900000e+03,   3.38000000e+02,
         1.85900000e+03,   4.16000000e+02,   3.21000000e+02,
         2.01900000e+03,   3.08000000e+02,   5.52000000e+02,
         3.86000000e+02,   1.33600000e+03,   4.00000000e+00,
         3.60000000e+02,   2.21000000e+02,   1.90800000e+03,
         2.75000000e+03,   3.94000000e+02,   6.98000000e+02,
         2.89200000e+03,   3.82200000e+03,   1.76100000e+03,
         3.63000000e+02,   9.14000000e+02,   7.36000000e+02,
         3.15000000e+02,   4.96000000e+02,   1.80000000e+02,
         8.26000000e+02,   8.63990000e+04,   4.62000000e+02,
         8.19000000e+02,   3.24400000e+03,   6.80000000e+01,
         6.30800000e+03,   4.82000000e+02,   3.79000000e+03,
         1.81700000e+03,   1.50000000e+02,   4.11500000e+03,
         3.34000000e+02,   2.87000000e+02,   8.38000000e+02,
         2.91990000e+04,   1.75230000e+04,   2.22000000e+02,
         9.90300000e+03,   5.84000000e+02,   1.80700000e+03,
         4.27100000e+03,   1.39600000e+03,   5.66000000e+02,
         2.25700000e+03,   7.32000000e+02,   2.90400000e+03,
         2.84300000e+03,   1.06200000e+03,   2.59550000e+04,
         2.53200000e+03,   5.26000000e+02,   1.67100000e+03,
         3.49300000e+03,   1.97300000e+03,   1.81000000e+02,
         5.44000000e+02,   2.77000000e+02,   3.00000000e+02,
         1.25800000e+03,   1.03700000e+03,   1.25000000e+03,
         3.28000000e+02,   1.47400000e+03,   1.44300000e+03,
         9.24000000e+02,   1.34800000e+03,   1.71400000e+03,
         3.79000000e+02,   9.84900000e+03,   9.64000000e+02,
         4.94300000e+03,   3.81000000e+02,   4.17000000e+02,
         2.98000000e+02,   7.54800000e+03,   7.67000000e+02,
         1.45000000e+03,   6.06480000e+04,   1.51040000e+04,
         3.97800000e+03,   7.88000000e+02,   3.01000000e+02,
         1.47460000e+04,   1.34000000e+03,   2.46000000e+02,
         4.05000000e+02,   8.60000000e+02,   3.37000000e+02,
         3.44000000e+02,   2.34000000e+03,   1.93700000e+03,
         7.10000000e+02,   7.90000000e+02,   5.55800000e+03,
         2.71000000e+02,   1.06700000e+04,   3.84300000e+03,
         7.31000000e+03,   5.32700000e+03,   7.73000000e+02,
         1.79000000e+03,   9.50000000e+01,   4.88600000e+03,
         2.27300000e+03,   3.95000000e+02,   3.86500000e+03,
         2.00570000e+04,   1.87200000e+03,   6.64000000e+02,
         1.07100000e+03,   7.35000000e+02,   3.22200000e+03,
         1.43300000e+03,   4.10200000e+03,   3.27000000e+02,
         5.58000000e+03,   2.63000000e+02,   4.41000000e+02,
         4.99000000e+02,   4.27260000e+04,   1.82000000e+02,
         8.49000000e+02,   1.00500000e+03,   1.75000000e+02,
         1.19000000e+03,   3.06000000e+02,   3.55000000e+02,
         8.95000000e+02,   5.98000000e+02,   1.37900000e+03,
         4.66000000e+02,   3.72000000e+02,   1.39300000e+03,
         5.28000000e+03,   2.74800000e+03,   2.18230000e+04,
         2.84000000e+02,   4.32800000e+03,   1.74600000e+03,
         1.00300000e+03,   2.85700000e+03,   5.96000000e+02,
         2.57000000e+02,   4.48900000e+03,   1.38430000e+04,
         2.65000000e+02,   7.19000000e+02,   2.95000000e+03,
         8.25000000e+02,   6.23590000e+04,   6.16000000e+02,
         1.12350000e+04,   5.08000000e+02,   6.39000000e+02,
         5.74000000e+02,   4.83000000e+02,   2.77720000e+04,
         1.15500000e+03,   1.81900000e+03,   2.36200000e+03,
         2.13500000e+03,   1.12720000e+04,   2.51230000e+04,
                    nan])



In [32]:

    
df_app_2015.info(max_cols=111)









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 421097 entries, 0 to 421096
Data columns (total 111 columns):
id                                2 non-null object
member_id                         0 non-null float64
loan_amnt                         421095 non-null float64
funded_amnt                       421095 non-null float64
funded_amnt_inv                   421095 non-null float64
term                              421095 non-null object
int_rate                          421095 non-null object
installment                       421095 non-null float64
grade                             421095 non-null object
sub_grade                         421095 non-null object
emp_title                         397221 non-null object
emp_length                        421095 non-null object
home_ownership                    421095 non-null object
annual_inc                        421095 non-null float64
verification_status               421095 non-null object
issue_d                           421095 non-null object
loan_status                       421095 non-null object
pymnt_plan                        421095 non-null object
url                               0 non-null float64
desc                              45 non-null object
purpose                           421095 non-null object
title                             420963 non-null object
zip_code                          421095 non-null object
addr_state                        421095 non-null object
dti                               421095 non-null float64
delinq_2yrs                       421095 non-null float64
earliest_cr_line                  421095 non-null object
inq_last_6mths                    421095 non-null float64
mths_since_last_delinq            217133 non-null float64
mths_since_last_record            74415 non-null float64
open_acc                          421095 non-null float64
pub_rec                           421095 non-null float64
revol_bal                         421095 non-null float64
revol_util                        420933 non-null object
total_acc                         421095 non-null float64
initial_list_status               421095 non-null object
out_prncp                         421095 non-null float64
out_prncp_inv                     421095 non-null float64
total_pymnt                       421095 non-null float64
total_pymnt_inv                   421095 non-null float64
total_rec_prncp                   421095 non-null float64
total_rec_int                     421095 non-null float64
total_rec_late_fee                421095 non-null float64
recoveries                        421095 non-null float64
collection_recovery_fee           421095 non-null float64
last_pymnt_d                      420801 non-null object
last_pymnt_amnt                   421095 non-null float64
next_pymnt_d                      292711 non-null object
last_credit_pull_d                421087 non-null object
collections_12_mths_ex_med        421095 non-null float64
mths_since_last_major_derog       122729 non-null float64
policy_code                       421095 non-null float64
application_type                  421095 non-null object
annual_inc_joint                  511 non-null float64
dti_joint                         509 non-null float64
verification_status_joint         511 non-null object
acc_now_delinq                    421095 non-null float64
tot_coll_amt                      421095 non-null float64
tot_cur_bal                       421095 non-null float64
open_acc_6m                       21372 non-null float64
open_il_6m                        21372 non-null float64
open_il_12m                       21372 non-null float64
open_il_24m                       21372 non-null float64
mths_since_rcnt_il                20810 non-null float64
total_bal_il                      21372 non-null float64
il_util                           18617 non-null float64
open_rv_12m                       21372 non-null float64
open_rv_24m                       21372 non-null float64
max_bal_bc                        21372 non-null float64
all_util                          21372 non-null float64
total_rev_hi_lim                  421095 non-null float64
inq_fi                            21372 non-null float64
total_cu_tl                       21372 non-null float64
inq_last_12m                      21372 non-null float64
acc_open_past_24mths              421095 non-null float64
avg_cur_bal                       421095 non-null float64
bc_open_to_buy                    417132 non-null float64
bc_util                           416868 non-null float64
chargeoff_within_12_mths          421095 non-null float64
delinq_amnt                       421095 non-null float64
mo_sin_old_il_acct                408841 non-null float64
mo_sin_old_rev_tl_op              421095 non-null float64
mo_sin_rcnt_rev_tl_op             421095 non-null float64
mo_sin_rcnt_tl                    421095 non-null float64
mort_acc                          421095 non-null float64
mths_since_recent_bc              417297 non-null float64
mths_since_recent_bc_dlq          108600 non-null float64
mths_since_recent_inq             376496 non-null float64
mths_since_recent_revol_delinq    151737 non-null float64
num_accts_ever_120_pd             421095 non-null float64
num_actv_bc_tl                    421095 non-null float64
num_actv_rev_tl                   421095 non-null float64
num_bc_sats                       421095 non-null float64
num_bc_tl                         421095 non-null float64
num_il_tl                         421095 non-null float64
num_op_rev_tl                     421095 non-null float64
num_rev_accts                     421094 non-null float64
num_rev_tl_bal_gt_0               421095 non-null float64
num_sats                          421095 non-null float64
num_tl_120dpd_2m                  401865 non-null float64
num_tl_30dpd                      421095 non-null float64
num_tl_90g_dpd_24m                421095 non-null float64
num_tl_op_past_12m                421095 non-null float64
pct_tl_nvr_dlq                    421095 non-null float64
percent_bc_gt_75                  416856 non-null float64
pub_rec_bankruptcies              421095 non-null float64
tax_liens                         421095 non-null float64
tot_hi_cred_lim                   421095 non-null float64
total_bal_ex_mort                 421095 non-null float64
total_bc_limit                    421095 non-null float64
total_il_high_credit_limit        421095 non-null float64
dtypes: float64(86), object(25)
memory usage: 356.6+ MB



In [38]:

    
df_app_2015.groupby('title').loan_amnt.mean()









    Out[38]:





title
Business                                   15607.365207
Car financing                              10082.201439
Credit Card/Auto Repair                    10950.000000
Credit card refinancing                    15953.246747
Debt consolidation                         15755.709990
DebtC                                      20000.000000
Green loan                                  9670.964126
Home buying                                14547.170139
Home improvement                           14748.540413
Learning and training                       2200.000000
Major purchase                             13055.713613
Medical expenses                            9200.547631
Moving and relocation                       8315.978530
New Baby and New House (CC Consolidate)     8000.000000
Other                                      10340.992752
Pay off Lowes Card                         10000.000000
Paying off higher interest cards & auto    21000.000000
Prescription Drug and Medical Costs         8000.000000
SAVE                                       12000.000000
Simple Loan Until Contract Is Completed    25000.000000
Student Loan                               10800.000000
Trying to come back to reality!             6000.000000
Vacation                                    6526.302315
considerate                                25000.000000
new day                                    23200.000000
new kitchen for momma!                      8400.000000
odymeds                                     6400.000000
Name: loan_amnt, dtype: float64



In [39]:

    
df_app_2015.groupby('purpose').loan_amnt.mean()









    Out[39]:





purpose
car                   10066.423832
credit_card           15951.617741
debt_consolidation    15756.119210
educational            2200.000000
home_improvement      14751.434191
house                 14542.715577
major_purchase        13058.913948
medical                9215.217115
moving                 8314.121901
other                 10345.197615
renewable_energy       9824.441964
small_business        15642.798751
vacation               6524.455313
wedding                8825.000000
Name: loan_amnt, dtype: float64



In [7]:

    
df_app_2015['emp_length'].unique()









    Out[7]:





array(['5 years', '< 1 year', '3 years', '10+ years', '2 years', 'n/a',
       '4 years', '8 years', '6 years', '9 years', '1 year', '7 years', nan], dtype=object)

Decriptive Analyss

Annual income distribution
Total loan amount groupby interest rate chunks
Average loan amount groupby grade
Average loan amount groupby



In [82]:

    
## selected columns
df = df_app_2015.ix[:, ['loan_status','loan_amnt', 'int_rate', 'grade', 'sub_grade',\
                        'purpose',\
                        'annual_inc', 'emp_length', 'home_ownership',\
                        'fico_range_low','fico_range_high',\
                        'num_actv_bc_tl', 'tot_cur_bal', 'mort_acc','num_actv_rev_tl',\
                        'pub_rec_bankruptcies','dti' ]]



In [83]:

    
df.head(3)









    Out[83]:






  
    
      
      loan_status
      loan_amnt
      int_rate
      grade
      sub_grade
      purpose
      annual_inc
      emp_length
      home_ownership
      fico_range_low
      fico_range_high
      num_actv_bc_tl
      tot_cur_bal
      mort_acc
      num_actv_rev_tl
      pub_rec_bankruptcies
      dti
    
  
  
    
      0
      Current
      16000.0
      8.49%
      B
      B1
      credit_card
      62000.0
      5 years
      MORTGAGE
      720.0
      724.0
      4.0
      227708.0
      3.0
      5.0
      0.0
      28.92
    
    
      1
      Current
      8000.0
      10.78%
      B
      B4
      debt_consolidation
      45000.0
      < 1 year
      MORTGAGE
      670.0
      674.0
      3.0
      148154.0
      2.0
      11.0
      0.0
      21.23
    
    
      2
      Current
      10000.0
      10.78%
      B
      B4
      debt_consolidation
      41600.0
      3 years
      MORTGAGE
      695.0
      699.0
      2.0
      168304.0
      2.0
      5.0
      0.0
      15.78



In [91]:

    
len(df.dropna())









    Out[91]:





421095



In [85]:

    
df.shape









    Out[85]:





(421097, 17)



In [86]:

    
df.loan_status.unique()









    Out[86]:





array(['Current', 'Fully Paid', 'Default', 'Charged Off',
       'Late (16-30 days)', 'Late (31-120 days)', 'In Grace Period', nan], dtype=object)



In [87]:

    
len(df[df['loan_status']=='Fully Paid'])









    Out[87]:





94968



In [88]:

    
len(df[df['loan_status']=='Default'])









    Out[88]:





260



In [89]:

    
len(df[df['loan_status']=='Charged Off'])









    Out[89]:





33416



In [90]:

    
len(df[df['loan_status']=='Late (31-120 days)'])









    Out[90]:





8626



In [65]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 421097 entries, 0 to 421096
Data columns (total 15 columns):
loan_status             421095 non-null object
loan_amnt               421095 non-null float64
int_rate                421095 non-null object
grade                   421095 non-null object
sub_grade               421095 non-null object
purpose                 421095 non-null object
title                   420963 non-null object
annual_inc              421095 non-null float64
emp_length              421095 non-null object
home_ownership          421095 non-null object
num_actv_bc_tl          421095 non-null float64
tot_cur_bal             421095 non-null float64
mort_acc                421095 non-null float64
num_actv_rev_tl         421095 non-null float64
pub_rec_bankruptcies    421095 non-null float64
dtypes: float64(7), object(8)
memory usage: 48.2+ MB



In [29]:

    
df.loan_status.unique()









    Out[29]:





array(['Current', 'Fully Paid', 'Default', 'Charged Off',
       'Late (16-30 days)', 'Late (31-120 days)', 'In Grace Period', nan], dtype=object)



In [30]:

    
## Convert applicable fields to numeric (I only select "Interest Rate" to use for this analysis)
df.ix[:,'int_rate'] = df.ix[:,['int_rate']]\
                        .applymap(lambda e: pd.to_numeric(str(e).rstrip()[:-1], errors='coerce'))
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 421097 entries, 0 to 421096
Data columns (total 18 columns):
loan_status                       421095 non-null object
loan_amnt                         421095 non-null float64
int_rate                          421095 non-null float64
grade                             421095 non-null object
sub_grade                         421095 non-null object
purpose                           421095 non-null object
annual_inc                        421095 non-null float64
emp_length                        421095 non-null object
home_ownership                    421095 non-null object
num_actv_bc_tl                    421095 non-null float64
tot_cur_bal                       421095 non-null float64
mort_acc                          421095 non-null float64
num_actv_rev_tl                   421095 non-null float64
total_cu_tl                       21372 non-null float64
pub_rec_bankruptcies              421095 non-null float64
mths_since_last_delinq            217133 non-null float64
mths_since_recent_bc_dlq          108600 non-null float64
mths_since_recent_revol_delinq    151737 non-null float64
dtypes: float64(12), object(6)
memory usage: 57.8+ MB



In [57]:

    
df = df.rename(columns={"int_rate": "int_rate(%)"})



In [58]:

    
df.head(3)









    Out[58]:






  
    
      
      loan_status
      loan_amnt
      int_rate(%)
      grade
      sub_grade
      annual_inc
      num_actv_bc_tl
      avg_cur_bal
      mort_acc
    
  
  
    
      0
      Current
      16000.0
      8.49
      B
      B1
      62000.0
      4.0
      22771.0
      3.0
    
    
      1
      Current
      8000.0
      10.78
      B
      B4
      45000.0
      3.0
      5926.0
      2.0
    
    
      2
      Current
      10000.0
      10.78
      B
      B4
      41600.0
      2.0
      9350.0
      2.0



In [7]:

    
#len(df.dropna(thresh=  , axis=1).columns)









    Out[7]:





52



In [59]:

    
df.describe()









    Out[59]:






  
    
      
      loan_amnt
      int_rate(%)
      annual_inc
      num_actv_bc_tl
      avg_cur_bal
      mort_acc
    
  
  
    
      count
      421095.000000
      421095.000000
      4.210950e+05
      421095.000000
      421095.000000
      421095.000000
    
    
      mean
      15240.285862
      12.600374
      7.696561e+04
      3.740966
      13104.784721
      1.685261
    
    
      std
      8571.325901
      4.318474
      7.394996e+04
      2.304542
      15671.803897
      1.990160
    
    
      min
      1000.000000
      5.320000
      0.000000e+00
      0.000000
      0.000000
      0.000000
    
    
      25%
      8500.000000
      9.170000
      4.600000e+04
      2.000000
      3199.000000
      0.000000
    
    
      50%
      14000.000000
      12.290000
      6.500000e+04
      3.000000
      7262.000000
      1.000000
    
    
      75%
      20000.000000
      15.590000
      9.169050e+04
      5.000000
      17974.500000
      3.000000
    
    
      max
      35000.000000
      28.990000
      9.500000e+06
      32.000000
      555925.000000
      52.000000



In [168]:

    
# 1. Loan Amount distribution
# # create plots and histogram to visualize total loan amounts 
fig = pl.figure(figsize=(8,10))
ax1 = fig.add_subplot(211)
ax1.plot(range(len(df)), sorted(df.loan_amnt), '.', color='purple')
ax1.set_xlabel('Loan Applicant Count')
ax1.set_ylabel('Loan Amount ($)')
ax1.set_title('Fig 1a - Sorted Issued Loan Amount (2015)', size=15)

# all_ histogram
# pick upper bound 900 to exclude too large numbers
ax2 = fig.add_subplot(212)
ax2.hist(df.loan_amnt, range=(df.loan_amnt.min(), 36000), color='purple')
ax2.set_xlabel('Loan Amount -$', size=12)
ax2.set_ylabel('Counts',size=12)
ax2.set_title('Fig 1b - Sorted Issued Loan Amount (2015)', size=15)









    Out[168]:





<matplotlib.text.Text at 0x1b84f3750>

Fig 1a shows the sorted issued loan amounts from low to high.
Fig 2c is a histogram showing the distribution of the issued loan amounts.

Obeservation
The Loan amounts vary from $1000 to $35,000, and the most frequent loan amounts issued are around $10,000.



In [81]:

    
inc_75 = df.describe().loc['75%', 'annual_inc']
count_75 = int(len(df)*0.75)









    Out[81]:





315822



In [165]:

    
# 2. Applicant Anual Income Distribution

fig = pl.figure(figsize=(8,16))

ax0 = fig.add_subplot(311)
ax0.plot(range(len(df.annual_inc)), sorted(df.annual_inc), '.', color='blue')
ax0.set_xlabel('Loan Applicant Count')
ax0.set_ylabel('Applicant Annual Income ($)')
ax0.set_title('Fig 2a - Sorted Applicant Annual Income-all ($) (2015)', size=15)

# use 75% quantile to plot the graph and histograms -- excluding extreme values
inc_75 = df.describe().loc['75%', 'annual_inc']
inc_below75 = df.annual_inc[df.annual_inc <= inc_75]
count_75 = int(len(df)*0.75)

ax1 = fig.add_subplot(312)
ax1.plot(range(count_75), sorted(df.annual_inc)[:count_75], '.', color='blue')
ax1.set_xlabel('Loan Applicant Count')
ax1.set_ylabel('Applicant Annual Income ($)')
ax1.set_title('Fig 2b - Sorted Applicant Annual Income-75% ($) (2015)',size=15)

# all_ histogram
# pick upper bound 900 to exclude too large numbers
ax2 = fig.add_subplot(313)
ax2.hist(df.annual_inc, range=(df.annual_inc.min(), inc_75), color='blue')
ax2.set_xlabel('Applicant Annual Income -$', size=12)
ax2.set_ylabel('Counts',size=12)
ax2.set_title('Fig 2c - Sorted Applicant Income-75% ($) (2015)',size=15)









    Out[165]:





<matplotlib.text.Text at 0x1afc72fd0>

Fig 2a and Fig 2b both show the sorted applicant annual income from low to high. The former indicates extreme values, and the latter plots only those values below the 75% quantile, which looks more sensible.
Fig 2c is a histogram showing the distribution of the applicants' income (below 75% quantile).

Obeservation The most frequent annual income amounts of ths applicants are between $40,000 and below $60,000.



In [69]:

    
4.600000e+04









    Out[69]:





46000.0



In [94]:

    
# 3. Loan amount and Applicant Annual Income
# View all
pl.figure(figsize=(6,4))
pl.plot(df.annual_inc, df.loan_amnt, '.')
pl.ylim(0, 40000)
pl.xlim(0, 0.2e7)   # df.annual_inc.max()
pl.title('Fig 3a - Loan Amount VS Applicant Annual Income_all', size=15)
pl.ylabel('Loan Amount ($)', size=15)
pl.xlabel('Applicant Annual Income ($)', size=15)









    Out[94]:





<matplotlib.text.Text at 0x128701910>

Fig 3a shows the approved loan amount against the applicants' annual income.

Oberservation:
We can see that there are a few people with self-reported income that is very high, while majority of the applicants are with income less than $100,000. These extreme values indicate a possibility of outliers.

Method to deal with Outliers
Locate Outliers using Median-Absolute-Deviation (MAD) test and remove them for further analysis

Pick samples to set outlier range using the mean of the outlier boundries-- the method could be improved by using ramdom sampling



In [102]:

    
# 3b
pl.figure(figsize=(6,4))
pl.plot(df.annual_inc, df.loan_amnt, '.')
pl.ylim(0, 40000)
pl.xlim(0, inc_75)
pl.title('Fig 3b - Loan Amount VS Applicant Annual Income_75%', size=15)
pl.ylabel('Loan Amount ($)', size=15)
pl.xlabel('Applicant Annual Income ($)', size=15)









    Out[102]:





<matplotlib.text.Text at 0x14d90a750>

Fig 3b is plot of the loan amount VS applicant annual income with all extreme income amounts being excluded.

Observation:
Now it is clearer to see that there is quite "rigid" standard to determine loan amounts based on income, however, there are still exceptions (sparse points above the "division line".



In [91]:

    
pl.plot(np.log(df.annual_inc), np.log(df.loan_amnt), '.')









    



/Users/fatkay22/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log
  if __name__ == '__main__':






    Out[91]:





[<matplotlib.lines.Line2D at 0x139cd3c90>]



In [ ]:



In [116]:

    
# 4. Average loan amount groupby grade
mean_loan_grade = df.groupby('grade')['loan_amnt'].mean()
mean_loan_grade









    Out[116]:





grade
A    14691.902340
B    14251.806455
C    14745.592285
D    15947.183739
E    18472.726622
F    20090.274524
G    20427.838025
Name: loan_amnt, dtype: float64



In [117]:

    
sum_loan_grade = df.groupby('grade')['loan_amnt'].sum()
sum_loan_grade









    Out[117]:





grade
A    1.077445e+09
B    1.676098e+09
C    1.777832e+09
D    9.991548e+08
E    6.455848e+08
F    1.972262e+08
G    4.426712e+07
Name: loan_amnt, dtype: float64



In [169]:

    
fig = pl.figure(figsize=(8,12))  #16,5

ax0 = fig.add_subplot(211)
ax0.plot(range(len(mean_loan_grade)), mean_loan_grade, 'o', color='blue')

ax0.set_ylim(0, 23000)
ax0.set_xlim(-0.5, len(mean_loan_grade))

ax0.set_xticks(range(len(mean_loan_grade)))
ax0.set_xticklabels(('A','B','C','D','E','F','G'))
ax0.set_xlabel('Grade')
ax0.set_ylabel('Average Loan Amount ($)')
ax0.set_title('Fig 4a - Average Loan Amount by Grade ($) (2015)', size=15)


ax1 = fig.add_subplot(212)
ax1.plot(range(len(sum_loan_grade)), sum_loan_grade, 'o', color='brown')

ax1.set_ylim(0, 2.3e9)
ax1.set_xlim(-0.5, len(sum_loan_grade))

ax1.set_xticks(range(len(sum_loan_grade)))
ax1.set_xticklabels(('A','B','C','D','E','F','G'))
ax1.set_xlabel('Grade')
ax1.set_ylabel('Total Loan Amount ($)')
ax1.set_title('Fig 4b - Total Loan Amount by Grade ($) (2015)', size=15)









    Out[169]:





<matplotlib.text.Text at 0x15e56b510>

Fig 4a shows the avereage approved loan amounts corresponded to the grades determined by the Lending Club.
Fig 4b shows the total approved loan amounts corresponded to the grades determined by the Lending Club.

Oberservation:
It is interesting to see that the points in these two charts have different trends-- the total loan amount gets higher from grade A to C, and then fall to a very low level; the average loan amount falls a little from grade A to grade B, and then gradually increases as the grade goes from B to G (increased by more than $5,000 from B to G).



In [ ]:

	id	member_id	loan_amnt	funded_amnt	funded_amnt_inv	term	int_rate	installment	grade	sub_grade	...	num_tl_op_past_12m	pct_tl_nvr_dlq	percent_bc_gt_75	tot_hi_cred_lim	total_bal_ex_mort	total_bc_limit	total_il_high_credit_limit
0	68615169	NaN	16000.0	16000.0	16000.0	60 months	8.49%	328.19	B	B1	...	3.0	87.5	50.0	307343.0	64371.0	23500.0	59526.0
1	68537564	NaN	8000.0	8000.0	8000.0	36 months	10.78%	261.08	B	B4	...	4.0	97.1	100.0	168819.0	148154.0	5800.0	136996.0
2	68536799	NaN	10000.0	10000.0	10000.0	36 months	10.78%	326.35	B	B4	...	2.0	100.0	33.3	190694.0	40681.0	13900.0	29512.0
3	68355089	NaN	24700.0	24700.0	24700.0	36 months	11.99%	820.28	C	C1	...	2.0	97.4	7.7	314017.0	39475.0	79300.0	24667.0
4	68506885	NaN	10000.0	10000.0	10000.0	60 months	11.99%	222.40	C	C1	...	1.0	100.0	50.0	61054.0	41166.0	13100.0	46854.0

	loan_status	loan_amnt	int_rate	grade	sub_grade	purpose	annual_inc	emp_length	home_ownership	fico_range_low	fico_range_high	num_actv_bc_tl	tot_cur_bal	mort_acc	num_actv_rev_tl	dti
0	Current	16000.0	8.49%	B	B1	credit_card	62000.0	5 years	MORTGAGE	720.0	724.0	4.0	227708.0	3.0	5.0	28.92
1	Current	8000.0	10.78%	B	B4	debt_consolidation	45000.0	< 1 year	MORTGAGE	670.0	674.0	3.0	148154.0	2.0	11.0	21.23
2	Current	10000.0	10.78%	B	B4	debt_consolidation	41600.0	3 years	MORTGAGE	695.0	699.0	2.0	168304.0	2.0	5.0	15.78

	loan_status	loan_amnt	int_rate(%)	grade	sub_grade	annual_inc	num_actv_bc_tl	avg_cur_bal	mort_acc
0	Current	16000.0	8.49	B	B1	62000.0	4.0	22771.0	3.0
1	Current	8000.0	10.78	B	B4	45000.0	3.0	5926.0	2.0
2	Current	10000.0	10.78	B	B4	41600.0	2.0	9350.0	2.0

	loan_amnt	int_rate(%)	annual_inc	num_actv_bc_tl	avg_cur_bal	mort_acc
count	421095.000000	421095.000000	4.210950e+05	421095.000000	421095.000000	421095.000000
mean	15240.285862	12.600374	7.696561e+04	3.740966	13104.784721	1.685261
std	8571.325901	4.318474	7.394996e+04	2.304542	15671.803897	1.990160
min	1000.000000	5.320000	0.000000e+00	0.000000	0.000000	0.000000
25%	8500.000000	9.170000	4.600000e+04	2.000000	3199.000000	0.000000
50%	14000.000000	12.290000	6.500000e+04	3.000000	7262.000000	1.000000
75%	20000.000000	15.590000	9.169050e+04	5.000000	17974.500000	3.000000
max	35000.000000	28.990000	9.500000e+06	32.000000	555925.000000	52.000000