In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import roc_auc_score
In [5]:
# Display/formatting settings
pd.set_option('display.max_columns', None)
In [3]:
# Extract data
train_data = pd.read_csv("default_risk_train_data.csv")
test_data = pd.read_csv("default_risk_test_data.csv")
In [6]:
train_data.head()
Out[6]:
SK_ID_CURR
TARGET
NAME_CONTRACT_TYPE
CODE_GENDER
FLAG_OWN_CAR
FLAG_OWN_REALTY
CNT_CHILDREN
AMT_INCOME_TOTAL
AMT_CREDIT
AMT_ANNUITY
AMT_GOODS_PRICE
NAME_TYPE_SUITE
NAME_INCOME_TYPE
NAME_EDUCATION_TYPE
NAME_FAMILY_STATUS
NAME_HOUSING_TYPE
REGION_POPULATION_RELATIVE
DAYS_BIRTH
DAYS_EMPLOYED
DAYS_REGISTRATION
DAYS_ID_PUBLISH
OWN_CAR_AGE
FLAG_MOBIL
FLAG_EMP_PHONE
FLAG_WORK_PHONE
FLAG_CONT_MOBILE
FLAG_PHONE
FLAG_EMAIL
OCCUPATION_TYPE
CNT_FAM_MEMBERS
REGION_RATING_CLIENT
REGION_RATING_CLIENT_W_CITY
WEEKDAY_APPR_PROCESS_START
HOUR_APPR_PROCESS_START
REG_REGION_NOT_LIVE_REGION
REG_REGION_NOT_WORK_REGION
LIVE_REGION_NOT_WORK_REGION
REG_CITY_NOT_LIVE_CITY
REG_CITY_NOT_WORK_CITY
LIVE_CITY_NOT_WORK_CITY
ORGANIZATION_TYPE
EXT_SOURCE_1
EXT_SOURCE_2
EXT_SOURCE_3
APARTMENTS_AVG
BASEMENTAREA_AVG
YEARS_BEGINEXPLUATATION_AVG
YEARS_BUILD_AVG
COMMONAREA_AVG
ELEVATORS_AVG
ENTRANCES_AVG
FLOORSMAX_AVG
FLOORSMIN_AVG
LANDAREA_AVG
LIVINGAPARTMENTS_AVG
LIVINGAREA_AVG
NONLIVINGAPARTMENTS_AVG
NONLIVINGAREA_AVG
APARTMENTS_MODE
BASEMENTAREA_MODE
YEARS_BEGINEXPLUATATION_MODE
YEARS_BUILD_MODE
COMMONAREA_MODE
ELEVATORS_MODE
ENTRANCES_MODE
FLOORSMAX_MODE
FLOORSMIN_MODE
LANDAREA_MODE
LIVINGAPARTMENTS_MODE
LIVINGAREA_MODE
NONLIVINGAPARTMENTS_MODE
NONLIVINGAREA_MODE
APARTMENTS_MEDI
BASEMENTAREA_MEDI
YEARS_BEGINEXPLUATATION_MEDI
YEARS_BUILD_MEDI
COMMONAREA_MEDI
ELEVATORS_MEDI
ENTRANCES_MEDI
FLOORSMAX_MEDI
FLOORSMIN_MEDI
LANDAREA_MEDI
LIVINGAPARTMENTS_MEDI
LIVINGAREA_MEDI
NONLIVINGAPARTMENTS_MEDI
NONLIVINGAREA_MEDI
FONDKAPREMONT_MODE
HOUSETYPE_MODE
TOTALAREA_MODE
WALLSMATERIAL_MODE
EMERGENCYSTATE_MODE
OBS_30_CNT_SOCIAL_CIRCLE
DEF_30_CNT_SOCIAL_CIRCLE
OBS_60_CNT_SOCIAL_CIRCLE
DEF_60_CNT_SOCIAL_CIRCLE
DAYS_LAST_PHONE_CHANGE
FLAG_DOCUMENT_2
FLAG_DOCUMENT_3
FLAG_DOCUMENT_4
FLAG_DOCUMENT_5
FLAG_DOCUMENT_6
FLAG_DOCUMENT_7
FLAG_DOCUMENT_8
FLAG_DOCUMENT_9
FLAG_DOCUMENT_10
FLAG_DOCUMENT_11
FLAG_DOCUMENT_12
FLAG_DOCUMENT_13
FLAG_DOCUMENT_14
FLAG_DOCUMENT_15
FLAG_DOCUMENT_16
FLAG_DOCUMENT_17
FLAG_DOCUMENT_18
FLAG_DOCUMENT_19
FLAG_DOCUMENT_20
FLAG_DOCUMENT_21
AMT_REQ_CREDIT_BUREAU_HOUR
AMT_REQ_CREDIT_BUREAU_DAY
AMT_REQ_CREDIT_BUREAU_WEEK
AMT_REQ_CREDIT_BUREAU_MON
AMT_REQ_CREDIT_BUREAU_QRT
AMT_REQ_CREDIT_BUREAU_YEAR
0
406644
0
Cash loans
F
N
Y
1
202500.0
976711.5
49869.0
873000.0
Unaccompanied
Commercial associate
Secondary / secondary special
Married
House / apartment
0.046220
-15743
-4482
-1797.0
-2455
NaN
1
1
0
1
0
1
Laborers
3.0
1
1
TUESDAY
14
0
1
1
0
0
0
Other
0.655600
0.684298
NaN
0.4000
0.1494
0.9970
0.9592
0.2377
0.48
0.2069
0.6250
0.5833
0.0368
0.3261
0.4088
0.0386
0.0744
0.4076
0.1550
0.9970
0.9608
0.2399
0.4834
0.2069
0.6250
0.5833
0.0377
0.3563
0.4259
0.0389
0.0787
0.4039
0.1494
0.9970
0.9597
0.2392
0.48
0.2069
0.6250
0.5833
0.0375
0.3318
0.4161
0.0388
0.0759
reg oper account
block of flats
0.4835
Stone, brick
No
0.0
0.0
0.0
0.0
-1626.0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
NaN
NaN
NaN
NaN
NaN
NaN
1
411997
0
Cash loans
M
Y
Y
0
225000.0
808650.0
26086.5
675000.0
Unaccompanied
State servant
Higher education
Married
House / apartment
0.018850
-20659
-10455
-4998.0
-4010
1.0
1
1
0
1
0
1
NaN
2.0
2
2
WEDNESDAY
10
0
0
0
0
0
0
Culture
NaN
0.623740
0.710674
0.1701
0.0545
0.9781
NaN
NaN
0.32
0.2759
0.2083
NaN
NaN
NaN
0.1022
NaN
0.1441
0.1733
0.0566
0.9782
NaN
NaN
0.3222
0.2759
0.2083
NaN
NaN
NaN
0.1065
NaN
0.1525
0.1718
0.0545
0.9781
NaN
NaN
0.32
0.2759
0.2083
NaN
NaN
NaN
0.1040
NaN
0.1471
NaN
block of flats
0.1512
Stone, brick
No
2.0
1.0
2.0
1.0
-1704.0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.0
0.0
0.0
0.0
0.0
3.0
2
241559
0
Revolving loans
M
N
Y
0
135000.0
180000.0
9000.0
180000.0
Unaccompanied
Commercial associate
Secondary / secondary special
Single / not married
House / apartment
0.007305
-9013
-1190
-3524.0
-1644
NaN
1
1
0
1
0
0
Laborers
1.0
3
3
SUNDAY
11
0
0
0
0
0
0
Construction
0.175511
0.492994
0.085595
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
NaN
0.0
0.0
0.0
0.0
-661.0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.0
0.0
0.0
0.0
0.0
4.0
3
296530
0
Cash loans
F
Y
Y
2
135000.0
592560.0
32274.0
450000.0
Unaccompanied
Working
Higher education
Married
House / apartment
0.018801
-10495
-744
-160.0
-152
15.0
1
1
0
1
0
0
Laborers
4.0
2
2
WEDNESDAY
9
0
0
0
0
0
0
Business Entity Type 3
0.117463
0.621860
0.579727
0.0619
0.0487
0.9816
0.7484
0.0258
0.00
0.1379
0.1667
0.2083
0.0130
0.0504
0.0509
0.0000
0.0000
0.0630
0.0506
0.9816
0.7583
0.0260
0.0000
0.1379
0.1667
0.2083
0.0133
0.0551
0.0530
0.0000
0.0000
0.0625
0.0487
0.9816
0.7518
0.0260
0.00
0.1379
0.1667
0.2083
0.0133
0.0513
0.0518
0.0000
0.0000
reg oper account
block of flats
0.0400
Panel
No
0.0
0.0
0.0
0.0
-508.0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.0
0.0
0.0
0.0
0.0
1.0
4
231371
0
Cash loans
F
N
N
1
112500.0
512446.5
34375.5
463500.0
Unaccompanied
Working
Secondary / secondary special
Civil marriage
House / apartment
0.022625
-10270
-451
-1602.0
-2797
NaN
1
1
0
1
0
1
Accountants
3.0
2
2
TUESDAY
16
0
0
0
0
0
0
Business Entity Type 3
0.310480
0.642651
0.466864
0.0722
0.0000
0.9771
0.6804
0.0256
0.00
0.1379
0.1667
0.0417
0.0115
0.0588
0.0443
0.0000
0.0807
0.0735
0.0000
0.9772
0.6929
0.0258
0.0000
0.1379
0.1667
0.0417
0.0117
0.0643
0.0461
0.0000
0.0855
0.0729
0.0000
0.9771
0.6847
0.0257
0.00
0.1379
0.1667
0.0417
0.0117
0.0599
0.0451
0.0000
0.0824
not specified
block of flats
0.0524
Panel
No
1.0
0.0
0.0
0.0
-12.0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0.0
0.0
0.0
0.0
0.0
0.0
Content source: chicagopython/CodingWorkshops
Similar notebooks: