In [1]:
# initialize
import numpy as np
import scipy as sp
import pandas as pd
import statsmodels.api as sm
import matplotlib as mpl
import matplotlib.tri as mtri
import matplotlib.pylab as plt
import seaborn as sns
from scipy import stats
from sklearn.datasets import make_regression
# Seaborn setting
sns.set(palette="muted", font_scale=2)
# data loading
data = pd.read_csv('d:/works/project/lc_project/lc_dataframe.csv')
data.tail()
Out[1]:
In [ ]:
# preprocessing:
In [3]:
# status/grade group dataframe(series)
data_grade = data.groupby("grade").loan_status.value_counts()
# status / grade ratio plot
n = 7
grade_mat = np.zeros(n)
for i in range(n):
grade_mat[i] = data_grade[i+1, 1] / float(np.sum(data_grade[i+1]))
# Grade/Paid Ratio Plot
plt.figure(figsize=(15, 10))
x = np.arange(len(grade_mat))
plt.bar(x, grade_mat, align='center')
plt.xticks(x, ('A', 'B', 'C', 'D', 'E', 'F', 'G'))
plt.title("Fully Paid Ratio by LC Credit Rank")
plt.xlabel("LC Credit Rank")
plt.ylabel("Grade/Paid Ratio")
plt.show()