In [2]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.feature_selection import RFE
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.model_selection import cross_val_score
In [3]:
df = pd.read_csv('LoanStats3d_securev1.csv.zip', compression='zip',header=1)
In [4]:
df.head()
Out[4]:
In [5]:
defaulters=['Default','Charged Off', 'Late (31-120 days)']
non_defaulters=['Fully Paid']
uncertain = ['Current','Late (16-30 days)','In Grace Period', 'none']
In [6]:
df.loan_status.unique()
Out[6]:
In [7]:
df['Target']= 2 ## uncertain
df.loc[df.loan_status.isin(defaulters),'Target'] = 0 ## defaulters
df.loc[df.loan_status.isin(non_defaulters),'Target'] = 1 ## paid -- (and to whom to issue the loan)
In [8]:
df.head()
Out[8]:
In [9]:
df.loc[df['Target'] == 0].describe()
Out[9]:
In [10]:
df.loc[df['Target'] == 1].describe()
Out[10]:
In [11]:
df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].describe()
Out[11]:
In [12]:
df.loc[df['Target'] == 1][['funded_amnt','total_pymnt']].describe()
Out[12]:
In [13]:
df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].head(10)
Out[13]:
In [14]:
C=0.0820849986238988
In [15]:
A=df.loc[df['Target'] == 0][['funded_amnt','total_pymnt']].describe()
In [24]:
B=df.loc[df['Target'] == 1][['funded_amnt','total_pymnt']].describe()
In [25]:
Funded=A['funded_amnt']['count']*A['funded_amnt']['mean']+B['funded_amnt']['count']*B['funded_amnt']['mean']
In [26]:
Funded
Out[26]:
In [27]:
Collected=A['total_pymnt']['count']*A['total_pymnt']['mean']+B['total_pymnt']['count']*B['total_pymnt']['mean']
In [30]:
Collected
Out[30]:
In [31]:
Collected-Funded
Out[31]:
In [ ]: