notebook.community

Edit and run



In [1]:

    
# initialize
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import matplotlib as mpl
import matplotlib.tri as mtri
import matplotlib.pylab as plt
import seaborn as sns
from scipy import stats
from sklearn.datasets import make_regression

# Seaborn setting
sns.set(palette="muted", font_scale=2)

# data loading
data = pd.read_csv('d:/works/project/lc_project/lc_dataframe.csv')
data.tail()









    



D:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)






    Out[1]:






  
    
      
      loan_amnt
      term
      int_rate
      installment
      grade
      sub_grade
      emp_title
      emp_length
      home_ownership
      annual_inc
      ...
      delinq_2yrs
      inq_last_6mths
      open_acc
      pub_rec
      revol_bal
      revol_util
      total_acc
      initial_list_status
      application_type
      acc_now_delinq
    
  
  
    
      268132
      31050
      60
      21.99
      857.40
      6
      61
      1
      10
      2
      875000.0
      ...
      1
      0
      10
      0
      25770
      79.3
      13
      0
      1
      0
    
    
      268133
      10800
      36
      7.89
      337.89
      1
      15
      1
      8
      2
      92400.0
      ...
      1
      0
      11
      0
      9760
      68.7
      36
      1
      1
      0
    
    
      268134
      9000
      36
      9.17
      286.92
      2
      22
      1
      1
      2
      80000.0
      ...
      1
      0
      8
      0
      6320
      51.8
      17
      0
      1
      0
    
    
      268135
      14400
      60
      25.99
      431.06
      6
      65
      0
      11
      6
      62000.0
      ...
      0
      1
      9
      1
      5677
      45.1
      30
      0
      1
      0
    
    
      268136
      8000
      36
      12.59
      267.98
      3
      32
      1
      4
      5
      45000.0
      ...
      0
      0
      12
      0
      9097
      50.8
      47
      1
      1
      0
    
  

5 rows × 27 columns



In [ ]:

    
# preprocessing:



In [3]:

    
# status/grade group dataframe(series)
data_grade = data.groupby("grade").loan_status.value_counts()

# status / grade ratio plot
n = 7
grade_mat = np.zeros(n)
for i in range(n):
    grade_mat[i] = data_grade[i+1, 1] / float(np.sum(data_grade[i+1]))

# Grade/Paid Ratio Plot
plt.figure(figsize=(15, 10))
x = np.arange(len(grade_mat))
plt.bar(x, grade_mat, align='center')
plt.xticks(x, ('A', 'B', 'C', 'D', 'E', 'F', 'G'))
plt.title("Fully Paid Ratio by LC Credit Rank")
plt.xlabel("LC Credit Rank")
plt.ylabel("Grade/Paid Ratio")
plt.show()

	loan_amnt	term	int_rate	installment	grade	sub_grade	emp_title	emp_length	home_ownership	annual_inc	...	delinq_2yrs	inq_last_6mths	open_acc	pub_rec	revol_bal	revol_util	total_acc	initial_list_status	application_type
268132	31050	60	21.99	857.40	6	61	1	10	2	875000.0	...	1	0	10	0	25770	79.3	13	0	1
268133	10800	36	7.89	337.89	1	15	1	8	2	92400.0	...	1	0	11	0	9760	68.7	36	1	1
268134	9000	36	9.17	286.92	2	22	1	1	2	80000.0	...	1	0	8	0	6320	51.8	17	0	1
268135	14400	60	25.99	431.06	6	65	0	11	6	62000.0	...	0	1	9	1	5677	45.1	30	0	1
268136	8000	36	12.59	267.98	3	32	1	4	5	45000.0	...	0	0	12	0	9097	50.8	47	1	1