In [1]:
# initialize
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import matplotlib as mpl
import matplotlib.tri as mtri
import matplotlib.pylab as plt
import seaborn as sns
from scipy import stats
from sklearn.datasets import make_regression

# Seaborn setting
sns.set(palette="muted", font_scale=2)

# data loading
data = pd.read_csv('d:/works/project/lc_project/lc_dataframe.csv')
data.tail()


D:\Anaconda2\lib\site-packages\IPython\core\interactiveshell.py:2717: DtypeWarning: Columns (6) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[1]:
loan_amnt term int_rate installment grade sub_grade emp_title emp_length home_ownership annual_inc ... delinq_2yrs inq_last_6mths open_acc pub_rec revol_bal revol_util total_acc initial_list_status application_type acc_now_delinq
268132 31050 60 21.99 857.40 6 61 1 10 2 875000.0 ... 1 0 10 0 25770 79.3 13 0 1 0
268133 10800 36 7.89 337.89 1 15 1 8 2 92400.0 ... 1 0 11 0 9760 68.7 36 1 1 0
268134 9000 36 9.17 286.92 2 22 1 1 2 80000.0 ... 1 0 8 0 6320 51.8 17 0 1 0
268135 14400 60 25.99 431.06 6 65 0 11 6 62000.0 ... 0 1 9 1 5677 45.1 30 0 1 0
268136 8000 36 12.59 267.98 3 32 1 4 5 45000.0 ... 0 0 12 0 9097 50.8 47 1 1 0

5 rows × 27 columns


In [ ]:
# preprocessing:

In [3]:
# status/grade group dataframe(series)
data_grade = data.groupby("grade").loan_status.value_counts()

# status / grade ratio plot
n = 7
grade_mat = np.zeros(n)
for i in range(n):
    grade_mat[i] = data_grade[i+1, 1] / float(np.sum(data_grade[i+1]))

# Grade/Paid Ratio Plot
plt.figure(figsize=(15, 10))
x = np.arange(len(grade_mat))
plt.bar(x, grade_mat, align='center')
plt.xticks(x, ('A', 'B', 'C', 'D', 'E', 'F', 'G'))
plt.title("Fully Paid Ratio by LC Credit Rank")
plt.xlabel("LC Credit Rank")
plt.ylabel("Grade/Paid Ratio")
plt.show()