In [1]:
%pylab inline
import pandas as pd
dfr = pd.read_csv('../datasets/loanf.csv')
# inspect, sanity check
dfr.head()
Out[1]:
In [2]:
# we add a column which indicates (True/False) whether the interest rate is <= 12
dfr['TF']=dfr['Interest.Rate']<=12
# inspect again
dfr.head()
# we see that the TF values are False as Interest.Rate is higher than 12 in all these cases
Out[2]:
In [3]:
# now we check the rows that have interest rate == 10 (just some number < 12)
# this is just to confirm that the TF value is True where we expect it to be
d = dfr[dfr['Interest.Rate']==10]
d.head()
# all is well
Out[3]:
In [4]:
import statsmodels.api as sm
# statsmodels requires us to add a constant column representing the intercept
dfr['intercept']=1.0
# identify the independent variables
ind_cols=['FICO.Score','Loan.Amount','intercept']
logit = sm.Logit(dfr['TF'], dfr[ind_cols])
result=logit.fit()
In [5]:
# get the fitted coefficients from the results
coeff = result.params
print(coeff)
In [6]:
def pz(fico,amt,coeff):
# compute the linear expression by multipyling the inputs by their respective coefficients.
# note that the coefficient array has the intercept coefficient at the end
z = coeff[0]*fico + coeff[1]*amt + coeff[2]
return 1/(1+exp(-1*z))
In [7]:
pz(720,10000,coeff)
Out[7]:
In [8]:
print("Trying multiple FICO Loan Amount combinations: ")
print('----')
print("fico=720, amt=10,000")
print(pz(720,10000,coeff))
print("fico=720, amt=20,000")
print(pz(720,20000,coeff))
print("fico=720, amt=30,000")
print(pz(720,30000,coeff))
print("fico=820, amt=10,000")
print(pz(820,10000,coeff))
print("fico=820, amt=20,000")
print(pz(820,20000,coeff))
print("fico=820, amt=30,000")
print(pz(820,30000,coeff))
In [9]:
pz(820,63000,coeff)
Out[9]:
In [10]:
print("Trying multiple FICO Loan Amount combinations: ")
print('----')
print("fico=820, amt=50,000")
print(pz(820,50000,coeff))
print("fico=820, amt=60,000")
print(pz(820,60000,coeff))
print("fico=820, amt=70,000")
print(pz(820,70000,coeff))
print("fico=820, amt=63,000")
print(pz(820,63000,coeff))
print("fico=820, amt=65,000")
print(pz(820,65000,coeff))
print("fico=820, amt=67,000")
print(pz(820,67000,coeff))
In [11]:
import matplotlib.pyplot as plt
loans = dfr[dfr['FICO.Score'] == 720]
x = loans['Loan.Amount']
y = pz(720,x,coeff)
plt.plot(x,y)
Out[11]:
In [12]:
z = dfr['FICO.Score']
x = dfr['Loan.Amount']
y = pz(z,x,coeff)
plt.plot(x,y)
# this doesn't looks fine!
Out[12]:
In [ ]: