notebook.community

Edit and run



In [2]:

    
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV

from sklearn.feature_selection import RFE
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.model_selection import cross_val_score



In [3]:

    
df = pd.read_csv('LoanStats3d_securev1.csv.zip', compression='zip',header=1)









    



/Users/Tinoargentino/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (0,19,59) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)



In [4]:

    
df.head()









    Out[4]:






  
    
      
      id
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      term
      int_rate
      installment
      grade
      sub_grade
      ...
      num_tl_90g_dpd_24m
      num_tl_op_past_12m
      pct_tl_nvr_dlq
      percent_bc_gt_75
      pub_rec_bankruptcies
      tax_liens
      tot_hi_cred_lim
      total_bal_ex_mort
      total_bc_limit
      total_il_high_credit_limit
    
  
  
    
      0
      68615169
      NaN
      16000.0
      16000.0
      16000.0
      60 months
      8.49%
      328.19
      B
      B1
      ...
      0.0
      3.0
      87.5
      50.0
      0.0
      0.0
      307343.0
      64371.0
      23500.0
      59526.0
    
    
      1
      68537564
      NaN
      8000.0
      8000.0
      8000.0
      36 months
      10.78%
      261.08
      B
      B4
      ...
      0.0
      4.0
      97.1
      100.0
      0.0
      0.0
      168819.0
      148154.0
      5800.0
      136996.0
    
    
      2
      68536799
      NaN
      10000.0
      10000.0
      10000.0
      36 months
      10.78%
      326.35
      B
      B4
      ...
      0.0
      2.0
      100.0
      33.3
      0.0
      0.0
      190694.0
      40681.0
      13900.0
      29512.0
    
    
      3
      68355089
      NaN
      24700.0
      24700.0
      24700.0
      36 months
      11.99%
      820.28
      C
      C1
      ...
      0.0
      2.0
      97.4
      7.7
      0.0
      0.0
      314017.0
      39475.0
      79300.0
      24667.0
    
    
      4
      68506885
      NaN
      10000.0
      10000.0
      10000.0
      60 months
      11.99%
      222.40
      C
      C1
      ...
      0.0
      1.0
      100.0
      50.0
      0.0
      0.0
      61054.0
      41166.0
      13100.0
      46854.0
    
  

5 rows × 115 columns



In [5]:

    
defaulters=['Default','Charged Off', 'Late (31-120 days)']
non_defaulters=['Fully Paid']
uncertain = ['Current','Late (16-30 days)','In Grace Period', 'none']



In [6]:

    
df.loan_status.unique()









    Out[6]:





array(['Current', 'Fully Paid', 'Default', 'Charged Off',
       'Late (16-30 days)', 'Late (31-120 days)', 'In Grace Period', nan], dtype=object)



In [7]:

    
df['Target']= 2 ## uncertain
df.loc[df.loan_status.isin(defaulters),'Target'] = 0  ## defaulters
df.loc[df.loan_status.isin(non_defaulters),'Target'] = 1  ## paid -- (and to whom to issue the loan)



In [8]:

    
df.head()









    Out[8]:






  
    
      
      id
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      term
      int_rate
      installment
      grade
      sub_grade
      ...
      num_tl_op_past_12m
      pct_tl_nvr_dlq
      percent_bc_gt_75
      pub_rec_bankruptcies
      tax_liens
      tot_hi_cred_lim
      total_bal_ex_mort
      total_bc_limit
      total_il_high_credit_limit
      Target
    
  
  
    
      0
      68615169
      NaN
      16000.0
      16000.0
      16000.0
      60 months
      8.49%
      328.19
      B
      B1
      ...
      3.0
      87.5
      50.0
      0.0
      0.0
      307343.0
      64371.0
      23500.0
      59526.0
      2
    
    
      1
      68537564
      NaN
      8000.0
      8000.0
      8000.0
      36 months
      10.78%
      261.08
      B
      B4
      ...
      4.0
      97.1
      100.0
      0.0
      0.0
      168819.0
      148154.0
      5800.0
      136996.0
      2
    
    
      2
      68536799
      NaN
      10000.0
      10000.0
      10000.0
      36 months
      10.78%
      326.35
      B
      B4
      ...
      2.0
      100.0
      33.3
      0.0
      0.0
      190694.0
      40681.0
      13900.0
      29512.0
      2
    
    
      3
      68355089
      NaN
      24700.0
      24700.0
      24700.0
      36 months
      11.99%
      820.28
      C
      C1
      ...
      2.0
      97.4
      7.7
      0.0
      0.0
      314017.0
      39475.0
      79300.0
      24667.0
      1
    
    
      4
      68506885
      NaN
      10000.0
      10000.0
      10000.0
      60 months
      11.99%
      222.40
      C
      C1
      ...
      1.0
      100.0
      50.0
      0.0
      0.0
      61054.0
      41166.0
      13100.0
      46854.0
      2
    
  

5 rows × 116 columns



In [9]:

    
df.loc[df['Target'] == 0].describe()









    Out[9]:






  
    
      
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      installment
      annual_inc
      dti
      delinq_2yrs
      fico_range_low
      fico_range_high
      ...
      num_tl_op_past_12m
      pct_tl_nvr_dlq
      percent_bc_gt_75
      pub_rec_bankruptcies
      tax_liens
      tot_hi_cred_lim
      total_bal_ex_mort
      total_bc_limit
      total_il_high_credit_limit
      Target
    
  
  
    
      count
      0.0
      42302.000000
      42302.000000
      42302.000000
      42302.000000
      4.230200e+04
      42302.000000
      42302.000000
      42302.000000
      42302.000000
      ...
      42302.000000
      42302.000000
      41790.000000
      42302.000000
      42302.000000
      4.230200e+04
      42302.000000
      42302.000000
      42302.000000
      42302.0
    
    
      mean
      NaN
      15658.597702
      15658.597702
      15650.406461
      461.664684
      7.023571e+04
      21.319409
      0.376294
      685.131436
      689.131483
      ...
      2.714080
      93.982022
      52.155906
      0.157321
      0.066545
      1.429021e+05
      51433.688738
      17917.678242
      43445.640135
      0.0
    
    
      std
      NaN
      8555.295964
      8555.295964
      8550.764991
      250.791773
      8.017445e+04
      49.315800
      0.982413
      23.979229
      23.979545
      ...
      2.067767
      8.685771
      35.785969
      0.417973
      0.576476
      1.443945e+05
      44800.858670
      18031.898258
      41308.078088
      0.0
    
    
      min
      NaN
      1000.000000
      1000.000000
      900.000000
      30.540000
      0.000000e+00
      0.000000
      0.000000
      660.000000
      664.000000
      ...
      0.000000
      12.500000
      0.000000
      0.000000
      0.000000
      2.584000e+03
      0.000000
      0.000000
      0.000000
      0.0
    
    
      25%
      NaN
      9500.000000
      9500.000000
      9500.000000
      279.720000
      4.300000e+04
      14.470000
      0.000000
      665.000000
      669.000000
      ...
      1.000000
      90.900000
      22.200000
      0.000000
      0.000000
      4.567700e+04
      23353.250000
      6500.000000
      16575.500000
      0.0
    
    
      50%
      NaN
      14475.000000
      14475.000000
      14450.000000
      403.035000
      6.000000e+04
      20.940000
      0.000000
      680.000000
      684.000000
      ...
      2.000000
      97.300000
      50.000000
      0.000000
      0.000000
      8.670500e+04
      40000.000000
      12500.000000
      33714.000000
      0.0
    
    
      75%
      NaN
      20400.000000
      20400.000000
      20400.000000
      596.340000
      8.400000e+04
      27.740000
      0.000000
      695.000000
      699.000000
      ...
      4.000000
      100.000000
      83.300000
      0.000000
      0.000000
      1.991648e+05
      65288.250000
      23000.000000
      58323.750000
      0.0
    
    
      max
      NaN
      35000.000000
      35000.000000
      35000.000000
      1445.460000
      8.900060e+06
      9999.000000
      27.000000
      845.000000
      850.000000
      ...
      22.000000
      100.000000
      100.000000
      8.000000
      85.000000
      2.319146e+06
      893784.000000
      326300.000000
      757807.000000
      0.0
    
  

8 rows × 90 columns



In [10]:

    
df.loc[df['Target'] == 1].describe()









    Out[10]:






  
    
      
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      installment
      annual_inc
      dti
      delinq_2yrs
      fico_range_low
      fico_range_high
      ...
      num_tl_op_past_12m
      pct_tl_nvr_dlq
      percent_bc_gt_75
      pub_rec_bankruptcies
      tax_liens
      tot_hi_cred_lim
      total_bal_ex_mort
      total_bc_limit
      total_il_high_credit_limit
      Target
    
  
  
    
      count
      0.0
      94968.000000
      94968.000000
      94968.000000
      94968.000000
      9.496800e+04
      94968.000000
      94968.000000
      94968.000000
      94968.000000
      ...
      94968.000000
      94968.000000
      93915.000000
      94968.000000
      94968.000000
      9.496800e+04
      9.496800e+04
      94968.000000
      9.496800e+04
      94968.0
    
    
      mean
      NaN
      14678.662813
      14678.662813
      14673.312853
      437.941518
      7.944094e+04
      17.906022
      0.322066
      696.294278
      700.294457
      ...
      2.412507
      94.198152
      42.624899
      0.155926
      0.050175
      1.864274e+05
      5.150747e+04
      22252.687547
      4.336753e+04
      1.0
    
    
      std
      NaN
      8680.966801
      8680.966801
      8677.636591
      254.684005
      6.976620e+04
      8.553780
      0.903981
      32.486469
      32.487292
      ...
      1.949052
      8.496625
      35.786696
      0.409449
      0.342618
      1.824717e+05
      4.883338e+04
      22438.665164
      4.387513e+04
      0.0
    
    
      min
      NaN
      1000.000000
      1000.000000
      975.000000
      30.540000
      5.000000e+03
      0.000000
      0.000000
      660.000000
      664.000000
      ...
      0.000000
      16.700000
      0.000000
      0.000000
      0.000000
      2.500000e+03
      0.000000e+00
      0.000000
      0.000000e+00
      1.0
    
    
      25%
      NaN
      8000.000000
      8000.000000
      8000.000000
      251.360000
      4.880000e+04
      11.550000
      0.000000
      670.000000
      674.000000
      ...
      1.000000
      91.300000
      0.000000
      0.000000
      0.000000
      5.399600e+04
      2.213300e+04
      7800.000000
      1.579275e+04
      1.0
    
    
      50%
      NaN
      12525.000000
      12525.000000
      12525.000000
      377.040000
      6.800000e+04
      17.300000
      0.000000
      690.000000
      694.000000
      ...
      2.000000
      97.600000
      40.000000
      0.000000
      0.000000
      1.268690e+05
      3.898000e+04
      15300.000000
      3.312550e+04
      1.0
    
    
      75%
      NaN
      20000.000000
      20000.000000
      20000.000000
      583.500000
      9.500000e+04
      23.700000
      0.000000
      710.000000
      714.000000
      ...
      3.000000
      100.000000
      66.700000
      0.000000
      0.000000
      2.693190e+05
      6.492800e+04
      28900.000000
      5.819925e+04
      1.0
    
    
      max
      NaN
      35000.000000
      35000.000000
      35000.000000
      1409.990000
      7.600000e+06
      380.530000
      39.000000
      845.000000
      850.000000
      ...
      25.000000
      100.000000
      100.000000
      7.000000
      15.000000
      4.214831e+06
      1.684313e+06
      684000.000000
      2.101913e+06
      1.0
    
  

8 rows × 90 columns



In [11]:

    
df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].describe()









    Out[11]:






  
    
      
      funded_amnt
      total_pymnt
    
  
  
    
      count
      42302.000000
      42302.000000
    
    
      mean
      15658.597702
      5534.268757
    
    
      std
      8555.295964
      3859.052983
    
    
      min
      1000.000000
      0.000000
    
    
      25%
      9500.000000
      2781.667500
    
    
      50%
      14475.000000
      4660.085000
    
    
      75%
      20400.000000
      7337.890000
    
    
      max
      35000.000000
      48307.240000



In [12]:

    
df.loc[df['Target'] == 1][['funded_amnt','total_pymnt']].describe()









    Out[12]:






  
    
      
      funded_amnt
      total_pymnt
    
  
  
    
      count
      94968.000000
      94968.000000
    
    
      mean
      14678.662813
      16161.004698
    
    
      std
      8680.966801
      9740.942874
    
    
      min
      1000.000000
      1000.010000
    
    
      25%
      8000.000000
      8569.170000
    
    
      50%
      12525.000000
      13994.355000
    
    
      75%
      20000.000000
      22034.527500
    
    
      max
      35000.000000
      50312.207800



In [13]:

    
df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].head(10)









    Out[13]:






  
    
      
      funded_amnt
      total_pymnt
    
  
  
    
      14
      27300.0
      8727.52
    
    
      21
      23975.0
      3928.86
    
    
      28
      23100.0
      5305.12
    
    
      50
      16000.0
      4382.17
    
    
      101
      24250.0
      4124.42
    
    
      113
      7200.0
      3043.61
    
    
      136
      16000.0
      3109.00
    
    
      139
      5000.0
      1320.07
    
    
      143
      8000.0
      3449.23
    
    
      179
      25825.0
      3028.09



In [14]:

    
C=0.0820849986238988



In [15]:

    
A=df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].describe()



In [24]:

    
B=df.loc[df['Target'] == 1][['funded_amnt','total_pymnt']].describe()



In [25]:

    
Funded=A['funded_amnt']['count']*A['funded_amnt']['mean']+B['funded_amnt']['count']*B['funded_amnt']['mean']



In [26]:

    
Funded









    Out[26]:





2056393250.0



In [27]:

    
Collected=A['total_pymnt']['count']*A['total_pymnt']['mean']+B['total_pymnt']['count']*B['total_pymnt']['mean']



In [30]:

    
Collected









    Out[30]:





1768888931.1485128



In [31]:

    
Collected-Funded









    Out[31]:





-287504318.85148716



In [ ]:

	id	member_id	loan_amnt	funded_amnt	funded_amnt_inv	term	int_rate	installment	grade	sub_grade	...	num_tl_op_past_12m	pct_tl_nvr_dlq	percent_bc_gt_75	tot_hi_cred_lim	total_bal_ex_mort	total_bc_limit	total_il_high_credit_limit
0	68615169	NaN	16000.0	16000.0	16000.0	60 months	8.49%	328.19	B	B1	...	3.0	87.5	50.0	307343.0	64371.0	23500.0	59526.0
1	68537564	NaN	8000.0	8000.0	8000.0	36 months	10.78%	261.08	B	B4	...	4.0	97.1	100.0	168819.0	148154.0	5800.0	136996.0
2	68536799	NaN	10000.0	10000.0	10000.0	36 months	10.78%	326.35	B	B4	...	2.0	100.0	33.3	190694.0	40681.0	13900.0	29512.0
3	68355089	NaN	24700.0	24700.0	24700.0	36 months	11.99%	820.28	C	C1	...	2.0	97.4	7.7	314017.0	39475.0	79300.0	24667.0
4	68506885	NaN	10000.0	10000.0	10000.0	60 months	11.99%	222.40	C	C1	...	1.0	100.0	50.0	61054.0	41166.0	13100.0	46854.0

	member_id	loan_amnt	funded_amnt	funded_amnt_inv	installment	annual_inc	dti	delinq_2yrs	fico_range_low	fico_range_high	...	num_tl_op_past_12m	pct_tl_nvr_dlq	percent_bc_gt_75	pub_rec_bankruptcies	tax_liens	tot_hi_cred_lim	total_bal_ex_mort	total_bc_limit	total_il_high_credit_limit	Target
count	0.0	42302.000000	42302.000000	42302.000000	42302.000000	4.230200e+04	42302.000000	42302.000000	42302.000000	42302.000000	...	42302.000000	42302.000000	41790.000000	42302.000000	42302.000000	4.230200e+04	42302.000000	42302.000000	42302.000000	42302.0
mean	NaN	15658.597702	15658.597702	15650.406461	461.664684	7.023571e+04	21.319409	0.376294	685.131436	689.131483	...	2.714080	93.982022	52.155906	0.157321	0.066545	1.429021e+05	51433.688738	17917.678242	43445.640135	0.0
std	NaN	8555.295964	8555.295964	8550.764991	250.791773	8.017445e+04	49.315800	0.982413	23.979229	23.979545	...	2.067767	8.685771	35.785969	0.417973	0.576476	1.443945e+05	44800.858670	18031.898258	41308.078088	0.0
min	NaN	1000.000000	1000.000000	900.000000	30.540000	0.000000e+00	0.000000	0.000000	660.000000	664.000000	...	0.000000	12.500000	0.000000	0.000000	0.000000	2.584000e+03	0.000000	0.000000	0.000000	0.0
25%	NaN	9500.000000	9500.000000	9500.000000	279.720000	4.300000e+04	14.470000	0.000000	665.000000	669.000000	...	1.000000	90.900000	22.200000	0.000000	0.000000	4.567700e+04	23353.250000	6500.000000	16575.500000	0.0
50%	NaN	14475.000000	14475.000000	14450.000000	403.035000	6.000000e+04	20.940000	0.000000	680.000000	684.000000	...	2.000000	97.300000	50.000000	0.000000	0.000000	8.670500e+04	40000.000000	12500.000000	33714.000000	0.0
75%	NaN	20400.000000	20400.000000	20400.000000	596.340000	8.400000e+04	27.740000	0.000000	695.000000	699.000000	...	4.000000	100.000000	83.300000	0.000000	0.000000	1.991648e+05	65288.250000	23000.000000	58323.750000	0.0
max	NaN	35000.000000	35000.000000	35000.000000	1445.460000	8.900060e+06	9999.000000	27.000000	845.000000	850.000000	...	22.000000	100.000000	100.000000	8.000000	85.000000	2.319146e+06	893784.000000	326300.000000	757807.000000	0.0

	member_id	loan_amnt	funded_amnt	funded_amnt_inv	installment	annual_inc	dti	delinq_2yrs	fico_range_low	fico_range_high	...	num_tl_op_past_12m	pct_tl_nvr_dlq	percent_bc_gt_75	pub_rec_bankruptcies	tax_liens	tot_hi_cred_lim	total_bal_ex_mort	total_bc_limit	total_il_high_credit_limit	Target
count	0.0	94968.000000	94968.000000	94968.000000	94968.000000	9.496800e+04	94968.000000	94968.000000	94968.000000	94968.000000	...	94968.000000	94968.000000	93915.000000	94968.000000	94968.000000	9.496800e+04	9.496800e+04	94968.000000	9.496800e+04	94968.0
mean	NaN	14678.662813	14678.662813	14673.312853	437.941518	7.944094e+04	17.906022	0.322066	696.294278	700.294457	...	2.412507	94.198152	42.624899	0.155926	0.050175	1.864274e+05	5.150747e+04	22252.687547	4.336753e+04	1.0
std	NaN	8680.966801	8680.966801	8677.636591	254.684005	6.976620e+04	8.553780	0.903981	32.486469	32.487292	...	1.949052	8.496625	35.786696	0.409449	0.342618	1.824717e+05	4.883338e+04	22438.665164	4.387513e+04	0.0
min	NaN	1000.000000	1000.000000	975.000000	30.540000	5.000000e+03	0.000000	0.000000	660.000000	664.000000	...	0.000000	16.700000	0.000000	0.000000	0.000000	2.500000e+03	0.000000e+00	0.000000	0.000000e+00	1.0
25%	NaN	8000.000000	8000.000000	8000.000000	251.360000	4.880000e+04	11.550000	0.000000	670.000000	674.000000	...	1.000000	91.300000	0.000000	0.000000	0.000000	5.399600e+04	2.213300e+04	7800.000000	1.579275e+04	1.0
50%	NaN	12525.000000	12525.000000	12525.000000	377.040000	6.800000e+04	17.300000	0.000000	690.000000	694.000000	...	2.000000	97.600000	40.000000	0.000000	0.000000	1.268690e+05	3.898000e+04	15300.000000	3.312550e+04	1.0
75%	NaN	20000.000000	20000.000000	20000.000000	583.500000	9.500000e+04	23.700000	0.000000	710.000000	714.000000	...	3.000000	100.000000	66.700000	0.000000	0.000000	2.693190e+05	6.492800e+04	28900.000000	5.819925e+04	1.0
max	NaN	35000.000000	35000.000000	35000.000000	1409.990000	7.600000e+06	380.530000	39.000000	845.000000	850.000000	...	25.000000	100.000000	100.000000	7.000000	15.000000	4.214831e+06	1.684313e+06	684000.000000	2.101913e+06	1.0

	funded_amnt	total_pymnt
14	27300.0	8727.52
21	23975.0	3928.86
28	23100.0	5305.12
50	16000.0	4382.17
101	24250.0	4124.42
113	7200.0	3043.61
136	16000.0	3109.00
139	5000.0	1320.07
143	8000.0	3449.23
179	25825.0	3028.09