You are provided with the following data: historical_loan.csv
This is the historical data that the bank has provided. It has the following columns
Application Attributes:
years
: Number of years the applicant has been employed ownership
: Whether the applicant owns a house or not income
: Annual income of the applicant age
: Age of the applicant amount
: Amount requested by the applicantBehavioural Attributes:
grade
: Credit grade of the applicantOutcome Variable:
default
: Whether the applicant has defaulted or not The intention is to build a predictive model for loan default
In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')
In [71]:
df = pd.read_csv("data/historical_loan.csv")
In [72]:
df.head()
Out[72]:
In [73]:
df.isnull().sum()
Out[73]:
In [74]:
df.years = df.years.fillna(np.mean(data.years))
In [75]:
data = df.loc[:,('age', 'years', 'amount')]
target = df.loc[:,'default']
In [76]:
from sklearn import tree
In [77]:
clf = tree.DecisionTreeClassifier(max_depth=3)
In [78]:
clf = clf.fit(data, target)
In [79]:
import pydotplus
from IPython.display import Image
In [80]:
dot_data = tree.export_graphviz(clf, out_file='tree_3.dot', feature_names=data.columns,
class_names=['no', 'yes'], filled=True,
rounded=True, special_characters=True)
In [81]:
graph = pydotplus.graph_from_dot_file('tree_3.dot')
In [82]:
Image(graph.create_png())
Out[82]:
In [83]:
plt.figure(figsize=(16, 6))
for idx, pair in enumerate([[0, 1], [0, 2], [1, 2]]):
X = data.iloc[:, pair]
y = target
plt.subplot(1, 3, idx + 1)
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, cmap = plt.cm.viridis)
plt.xlabel(X.columns[0])
plt.ylabel(X.columns[1])
plt.axis("tight")
In [85]:
plt.figure(figsize=(16, 6))
for idx, pair in enumerate([[0, 1], [0, 2], [1, 2]]):
X = data.iloc[:, pair]
y = target
plt.subplot(1, 3, idx + 1)
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, cmap = plt.cm.viridis)
# Classify the points
clf2 = tree.DecisionTreeClassifier(max_depth = 10).fit(X, y)
x_min, x_max = X.iloc[:, 0].min(), X.iloc[:, 0].max()
y_min, y_max = X.iloc[:, 1].min(), X.iloc[:, 1].max()
xx, yy = np.meshgrid(np.arange(x_min, x_max, (x_max - x_min)/100),
np.arange(y_min, y_max, (y_max - y_min)/100))
Z = clf2.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# plot the mesh
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.viridis, alpha = 0.5)
plt.xlabel(X.columns[0])
plt.ylabel(X.columns[1])
plt.axis("tight")
In [86]:
from mpl_toolkits.mplot3d import Axes3D
In [87]:
plt.style.use('seaborn-notebook')
In [88]:
X = data
y = target
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X.iloc[:,0], X.iloc[:,1], X.iloc[:,2], c=y, cmap = plt.cm.viridis, alpha = 0.3)
ax.set_xlabel(X.columns[0])
ax.set_ylabel(X.columns[1])
ax.set_zlabel(X.columns[2])
plt.tight_layout()
In [89]:
# Classify the points
clf3 = tree.DecisionTreeClassifier(max_depth = 10).fit(X, y)
x_min, x_max = X.iloc[:, 0].min(), X.iloc[:, 0].max()
y_min, y_max = X.iloc[:, 1].min(), X.iloc[:, 1].max()
z_min, z_max = X.iloc[:, 2].min(), X.iloc[:, 2].max()
xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, (x_max - x_min)/25),
np.arange(y_min, y_max, (y_max - y_min)/25),
np.arange(z_min, z_max, (z_max - z_min)/25))
C = clf3.predict(np.c_[xx.ravel(), yy.ravel(), yy.ravel()])
C = C.reshape(xx.shape)
In [90]:
# plot the mesh and points
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X.iloc[:,0], X.iloc[:,1], X.iloc[:,2], c=y, cmap = plt.cm.viridis, alpha = 0.5)
ax.set_xlabel(X.columns[0])
ax.set_ylabel(X.columns[1])
ax.set_zlabel(X.columns[2])
ax.scatter(xx, yy, zz, c=C, cmap = plt.cm.viridis, alpha = 0.2)
plt.tight_layout()
In [91]:
import matplotlib.animation as animation
from IPython.display import HTML
In [92]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X.iloc[:,0], X.iloc[:,1], X.iloc[:,2], c=y, cmap = plt.cm.viridis, alpha = 0.5)
ax.set_xlabel(X.columns[0])
ax.set_ylabel(X.columns[1])
ax.set_zlabel(X.columns[2])
ax.scatter(xx, yy, zz, c=C, cmap = plt.cm.viridis, alpha = 0.2)
plt.tight_layout()
# Animate by changing azimuth and elevation
azim = [i for i in range(0,360,36)]
elev = [i for i in range(0,360,36)]
def rotate(num, elev, azim):
ax.view_init(elev=elev[num], azim=azim[num])
# Creating the Animation object
anim = animation.FuncAnimation(fig, rotate, 10, fargs=(elev, azim),
interval=2000, blit=False)
plt.close(fig)
HTML(anim.to_html5_video())
Out[92]:
In [ ]: