Concise ML



In [1]:

    
# Frame - Predict Default
import numpy as np
import pandas as pd



In [2]:

    
# Acquire Data
df = pd.read_csv('credit-risk/data/historical_loan.csv')



In [3]:

    
# Refine Data
df.years.fillna(df.years.median(), inplace = True)



In [4]:

    
# Transform Data
from sklearn.preprocessing import LabelEncoder
df['ownership'] = LabelEncoder().fit_transform(df.ownership)
df['grade'] = LabelEncoder().fit_transform(df.grade)
df['income'] = np.log(df.income)



In [5]:

    
# Model - Data & Target
X = df.iloc[:,1:8]
y = df.iloc[:,0]



In [6]:

    
# Model Creation
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier()



In [7]:

    
# Model Selection
from sklearn.model_selection import cross_val_score
score = cross_val_score(clf,X,y, scoring="roc_auc", cv=5, n_jobs=-1)
np.mean(score)









    Out[7]:





0.70162528816671299



In [8]:

    
# Model Tuning
from sklearn.model_selection import GridSearchCV
parameters = {'n_estimators':[10, 50, 100], 'max_depth':[5, 10]}
clf2 = GridSearchCV(clf, parameters, return_train_score=True)
clf2.fit(X, y)
clf2.best_params_
#clf2.cv_results_['mean_test_score']









    Out[8]:





{'max_depth': 10, 'n_estimators': 100}