In [53]:
lc_data = pd.DataFrame.from_csv('./lc_dataframe(cleaning).csv')
lc_data = lc_data.reset_index()
lc_data.tail()
Out[53]:
In [54]:
from sklearn.preprocessing import scale
x_loan_amnt = lc_data['loan_amnt'].reshape(-1,1)
scaled_loan_amnt = pd.DataFrame(np.hstack([scale(x_loan_amnt)]),
columns = ["scale(x_loan_amnt)"]
)
x_annual_inc = lc_data['annual_inc'].reshape(-1,1)
scaled_annual_inc = pd.DataFrame(np.hstack([scale(x_annual_inc)]),
columns = ["scale(x_annual_inc)"]
)
x_revol_bal = lc_data['revol_bal'].reshape(-1,1)
scaled_revol_bal = pd.DataFrame(np.hstack([scale(x_revol_bal)]),
columns = ["scale(x_revol_bal)"]
)
"""print(scaled_loan_amnt['scale(x_loan_amnt)'])
print(scaled_annual_inc['scale(x_annual_inc)'])
print(scaled_revol_bal['scale(x_revol_bal)'])"""
Out[54]:
In [55]:
lc_data = lc_data.drop('loan_amnt', 1)
lc_data = pd.concat([lc_data, scaled_loan_amnt], axis = 1)
lc_data = lc_data.drop('annual_inc', 1)
lc_data = pd.concat([lc_data, scaled_annual_inc], axis = 1)
lc_data = lc_data.drop('revol_bal', 1)
lc_data = pd.concat([lc_data, scaled_revol_bal], axis = 1)
y = lc_data['loan_status']
lc_data = lc_data.drop('loan_status', 1)
lc_data = pd.concat([lc_data, y], axis = 1)
lc_data.tail()
Out[55]:
In [56]:
feature_names = ["int_rate", "emp_title", "emp_length", "home_ownership", "verification_status", "issue_d", "desc", "purpose", "dti", "delinq_2yrs", "inq_last_6mths", "pub_rec", "revol_util", "total_acc", "initial_list_status", "scale(x_loan_amnt)", "scale(x_annual_inc)", "scale(x_revol_bal)"]
dfX = lc_data[feature_names]
dfy = lc_data['loan_status']
dfX.tail()
Out[56]:
In [71]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dfX, dfy, test_size=0.25, random_state=1)
In [80]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='entropy', max_depth=8, min_samples_leaf=2).fit(X_train, y_train)
In [75]:
from sklearn import tree
with open('lc.dot','w') as f:
f = tree.export_graphviz(model, out_file=f)
import os
os.unlink('lc.dot')
## 저장소 만들기 시도
In [78]:
import pydotplus
dot_data = tree.export_graphviz(model, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
graph.write_pdf('lc.pdf')
## Pdf 파일로 뽑아내기 시도
Out[78]:
In [90]:
from IPython.display import Image
dot_data = tree.export_graphviz(model, out_file=f,
feature_names=["int_rate", "emp_title", "emp_length", "home_ownership", "verification_status", "issue_d", "desc", "purpose", "dti", "delinq_2yrs", "inq_last_6mths", "pub_rec", "revol_util", "total_acc", "initial_list_status", "scale(x_loan_amnt)", "scale(x_annual_inc)", "scale(x_revol_bal)"],
class_names = 'loan_status',
filled=True, rounded = True,
special_characters = True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_jpeg())
Out[90]:
In [93]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_train, model.predict(X_train))
Out[93]:
In [94]:
confusion_matrix(y_test, model.predict(X_test))
Out[94]:
In [95]:
from sklearn.metrics import classification_report
print(classification_report(y_train, model.predict(X_train)))
In [96]:
print(classification_report(y_test, model.predict(X_test)))