In [3]:
lc_data = pd.DataFrame.from_csv('./lc_dataframe(cleaning).csv')
lc_data = lc_data.reset_index()
lc_data.tail()
Out[3]:
In [25]:
x = lc_data['term']
n, bins, patches = plt.hist(x, bins = 100, color = 'r')
plt.xlabel('month')
plt.ylabel('counts')
plt.title('V1 Term Histogram')
plt.show()
In [31]:
x = lc_data['int_rate']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('interest rate %')
plt.ylabel('counts')
plt.title('V2 int_rate Histogram (Interest rate on loan)')
plt.show()
In [33]:
x = lc_data['installment']
n, bins, patches = plt.hist(x)
plt.xlabel('installment')
plt.ylabel('counts')
plt.title('V3 installment Histogram')
plt.show()
In [47]:
x = lc_data['annual_inc']
n, bins, patches = plt.hist(x, color = 'r',range = (0, 100000))
plt.xlabel('annual income for users')
plt.ylabel('counts')
plt.title('V9 annual_inc Histogram')
plt.show()
In [49]:
x = lc_data['loan_amnt']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('loan amount')
plt.ylabel('counts')
plt.title('V12 loan_amnt Histogram')
plt.show()
In [75]:
x = lc_data['desc']
sns.distplot(x)
plt.xlabel('length of letter on doc')
plt.ylabel('density')
plt.title('V13 desc Histogram')
plt.show()
In [66]:
x = lc_data['dti']
n, bins, patches = plt.hist(x, color='r')
plt.xlabel('A ratio total monthly debt payments on the total debt obligations')
plt.ylabel('counts')
plt.title('V15 dti Histogram')
plt.show()
In [67]:
x = lc_data['delinq_2yrs']
n, bins, patches = plt.hist(x, color = 'g')
plt.xlabel('The number ofincidences of delinquency in the borrowerin 2yrs')
plt.ylabel('counts')
plt.title('V16 delinq_2yrs Histogram')
plt.show()
In [68]:
x = lc_data['inq_last_6mths']
n, bins, patches = plt.hist(x)
plt.xlabel('The number of inquiries in past 6 months')
plt.ylabel('counts')
plt.title('V16 delinq_2yrs Histogram')
plt.show()
In [69]:
x = lc_data['open_acc']
n, bins, patches = plt.hist(x, color = 'r')
plt.xlabel('The number of open credit lines in the borrower\'s credit file')
plt.ylabel('counts')
plt.title('V18 open_acc Histogram')
plt.show()
In [74]:
x = lc_data['pub_rec']
sns.distplot(x, color = 'g')
plt.xlabel('Number of derogatory public records')
plt.ylabel('density')
plt.title('V19 pub_rec Histogram')
plt.show()
In [73]:
x = lc_data['revol_bal']
sns.distplot(x)
plt.xlabel('Total credit revolving balance')
plt.ylabel('density')
plt.title('V20 revol_bal Histogram')
plt.show()
In [80]:
x = lc_data['revol_util']
sns.distplot(x)
plt.xlabel('the amount of credit the borrower is using relative')
plt.ylabel('density')
plt.title('V21 revol_util Histogram')
plt.show()
In [78]:
x = lc_data['total_acc']
plt.hist(x)
plt.xlabel('The total number of credit lines currently in the borrower\'s credit file')
plt.ylabel('counts')
plt.title('## V22 total_acc Histogram')
plt.show()
In [81]:
x = lc_data['acc_now_delinq']
plt.hist(x)
plt.xlabel('The number of accounts on which the borrower is now delinquent.')
plt.ylabel('counts')
plt.title('V24 acc_now_delinq Histogram')
plt.show()