In [72]:
# Part4. Hoax Prediction using Logistic Regression and Random Forest
In [56]:
df = pd.read_csv("ufo_reports.csv", sep='\t')
df=df[(~df.Long.isnull()) & (~df.Lat.isnull())]
In [57]:
shapes = pd.get_dummies(df.Shape,prefix='Shape').iloc[:,0:]
In [58]:
# df=pd.concat([df,shapes],axis=1)
cols_to_keep=['Date', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat', 'Long', 'Shape',
'Duration_Sec', 'Summary', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'state', 'state abbr', 'Region',
'Hoax', 'ASTR','HOL', 'Pop', 'Milit_Share']
df = df[cols_to_keep].join(shapes.ix[:, 'Shape_Chevron':])
In [59]:
regions = pd.get_dummies(df.Region,prefix='Region').iloc[:,0:]
In [60]:
cols_to_keep=['Date', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat', 'Long','Shape',
'Duration_Sec', 'Summary', 'WeekDay', 'Week', 'Quarter', 'TimePer',
'state', 'state abbr', 'Region', 'Hoax', 'ASTR','HOL', 'Pop', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle']
df = df[cols_to_keep].join(regions.ix[:, 'Region_Midwest':])
In [61]:
df.columns
df = df.reset_index()
In [62]:
df_copy = pd.DataFrame(df)
df_copy.set_index('Date',inplace=True)
df_copy.index = pd.to_datetime(df_copy.index, unit='s')
df_copy.head()
Out[62]:
index
Year
Month
Day
Time
City
State
Lat
Long
Shape
...
Shape_Other
Shape_Oval
Shape_Rectangle
Shape_Sphere
Shape_Teardrop
Shape_Triangle
Region_Midwest
Region_Northeast
Region_South
Region_West
Date
2015-12-31
0
2015
12
31
23:59
Eugene
OR
44.052069
-123.086746
Fireball
...
0
0
0
0
0
0
0
0
0
1
2015-12-31
1
2015
12
31
15:00
Monmouth
OR
44.848449
-123.233987
Egg
...
0
0
0
0
0
0
0
0
0
1
2015-12-30
2
2015
12
30
22:00
Portland
OR
45.523447
-122.676207
Light
...
0
0
0
0
0
0
0
0
0
1
2015-12-30
3
2015
12
30
19:30
Springfield
OR
44.046237
-123.022028
Changing
...
0
0
0
0
0
0
0
0
0
1
2015-12-30
4
2015
12
30
16:50
Orient
OR
45.467337
-122.352587
Cylinder
...
0
0
0
0
0
0
0
0
0
1
5 rows × 47 columns
In [63]:
#astronomical events
ASTR=[
'2014-05-06','2014-01-01','2014-01-02','2014-01-03','2014-01-05','2014-01-16','2014-01-30',
'2014-02-14',
'2014-03-01','2014-03-16','2014-03-20','2014-03-30',
'2014-04-08','2014-04-15','2014-04-23','2014-04-22','2014-04-29',
'2014-05-05','2014-05-10','2014-05-14','2014-05-24','2014-05-28',
'2014-06-07','2014-06-13','2014-06-21','2014-06-27',
'2014-07-12','2014-07-26','2014-07-29','2014-07-28',
'2014-08-10','2014-08-12','2014-08-18','2014-08-25','2014-08-29','2014-08-13',
'2014-09-09','2014-09-23','2014-09-24',
'2014-10-04','2014-10-07','2014-10-08','2014-10-09','2014-10-21','2014-10-20','2014-10-23',
'2014-11-05','2014-11-06','2014-11-17','2014-11-18','2014-11-22',
'2014-12-06','2014-12-13','2014-12-14','2014-12-21','2014-12-22','2014-12-23','2014-12-22',
'2015-01-03','2015-01-04','2015-01-05','2015-01-20',
'2015-02-03','2015-02-06','2015-02-18','2015-02-22','2015-2-024',
'2015-03-05','2015-03-06','2015-03-20',
'2015-04-04','2015-04-13','2015-04-18','2015-04-22','2015-04-23','2015-04-25',
'2015-05-04','2015-05-05','2015-05-06','2015-05-07','2015-05-18','2015-05-23',
'2015-06-02','2015-06-06','2015-06-16','2015-06-21','2015-06-24',
'2015-07-01','2015-07-02','2015-07-14','2015-07-16','2015-07-28','2015-07-29','2015-07-31',
'2015-08-12','2015-08-13','2015-08-14','2015-08-29',
'2015-09-01','2015-09-04','2015-09-13','2015-09-23','2015-09-28',
'2015-10-01','2015-10-08','2015-10-11','2015-10-16','2015-10-13','2015-10-22','2015-10-21','2015-10-26','2015-10-27','2015-10-28',
'2015-11-06','2015-11-05','2015-11-11','2015-11-18','2015-11-17','2015-11-25',
'2015-12-07','2015-12-11','2015-12-13','2015-12-14','2015-12-22','2015-12-21','2015-12-25','2015-12-29'
]
In [64]:
holiday=[
'2014-01-01','2014-01-20','2014-02-17','2014-03-04','2014-03-17','2014-03-09','2014-05-26',
'2014-07-04','2014-09-01','2014-10-13','2014-11-11','2014-11-27','2014-12-25'
'2015-01-01','2015-01-19','2015-02-16','2015-05-25','2015-07-04','2015-09-07',
'2015-10-12','2015-11-11','2015-11-26','2015-12-25'
]
In [65]:
def MATCH(a, LIST):
if a in LIST:
return 1
else:
return 0
In [66]:
df=df.reset_index()
df.head(1)
df.columns
Out[66]:
Index(['Date', 'index', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat',
'Long', 'Shape', 'Duration_Sec', 'Summary', 'WeekDay', 'Week',
'Quarter', 'TimePer', 'state', 'state abbr', 'Region', 'Hoax', 'ASTR',
'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
'Shape_Triangle', 'Region_Midwest', 'Region_Northeast', 'Region_South',
'Region_West'],
dtype='object')
In [26]:
# HOAX Prediction
# 1. Logistic Regression
In [27]:
# All features
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
import statsmodels.formula.api as smf
sm.Logit(df['Hoax'],
sm.add_constant(df[['Year', 'Month', 'Day', 'Lat', 'Long',
'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']])).fit().summary()
Optimization terminated successfully.
Current function value: 0.227311
Iterations 12
Out[27]:
Logit Regression Results
Dep. Variable: Hoax No. Observations: 12172
Model: Logit Df Residuals: 12135
Method: MLE Df Model: 36
Date: Wed, 08 Jun 2016 Pseudo R-squ.: 0.1112
Time: 19:03:03 Log-Likelihood: -2766.8
converged: True LL-Null: -3113.0
LLR p-value: 1.993e-122
coef std err z P>|z| [95.0% Conf. Int.]
const -1988.9681 162.625 -12.230 0.000 -2307.707 -1670.229
Year 0.9865 0.081 12.220 0.000 0.828 1.145
Month 0.1725 0.078 2.198 0.028 0.019 0.326
Day -0.0146 0.005 -3.139 0.002 -0.024 -0.005
Lat -0.0174 0.008 -2.174 0.030 -0.033 -0.002
Long 0.0018 0.004 0.411 0.681 -0.007 0.010
Duration_Sec 1.565e-05 5.26e-06 2.973 0.003 5.33e-06 2.6e-05
WeekDay 0.0042 0.019 0.217 0.828 -0.034 0.042
Week 0.0116 0.015 0.781 0.435 -0.018 0.041
Quarter -0.3344 0.145 -2.302 0.021 -0.619 -0.050
TimePer -0.0185 0.010 -1.867 0.062 -0.038 0.001
ASTR -0.2170 0.105 -2.060 0.039 -0.423 -0.011
Pop 1.629e-08 3.23e-09 5.043 0.000 9.96e-09 2.26e-08
HOL -0.5963 0.222 -2.682 0.007 -1.032 -0.161
Milit_Share 11.3181 9.657 1.172 0.241 -7.609 30.245
Shape_Chevron -0.5858 0.366 -1.601 0.109 -1.303 0.131
Shape_Cigar -0.3963 0.301 -1.317 0.188 -0.986 0.193
Shape_Circle -0.4180 0.145 -2.891 0.004 -0.701 -0.135
Shape_Cone 1.0318 0.353 2.919 0.004 0.339 1.725
Shape_Cross 1.2484 0.418 2.983 0.003 0.428 2.069
Shape_Cylinder -0.4175 0.349 -1.195 0.232 -1.102 0.267
Shape_Diamond -0.2500 0.301 -0.830 0.406 -0.840 0.340
Shape_Disk -0.4771 0.243 -1.960 0.050 -0.954 7.77e-05
Shape_Egg 0.8685 0.351 2.471 0.013 0.180 1.557
Shape_Fireball -0.2888 0.151 -1.911 0.056 -0.585 0.007
Shape_Flash -0.3078 0.263 -1.168 0.243 -0.824 0.209
Shape_Formation -0.9248 0.278 -3.330 0.001 -1.469 -0.380
Shape_Light -0.1021 0.119 -0.855 0.393 -0.336 0.132
Shape_Other -0.1418 0.165 -0.857 0.392 -0.466 0.183
Shape_Oval -0.8490 0.253 -3.361 0.001 -1.344 -0.354
Shape_Rectangle -1.6501 0.518 -3.187 0.001 -2.665 -0.635
Shape_Sphere -0.6276 0.191 -3.281 0.001 -1.002 -0.253
Shape_Teardrop -0.2266 0.417 -0.544 0.587 -1.043 0.590
Shape_Triangle -0.7574 0.187 -4.041 0.000 -1.125 -0.390
Region_Midwest -0.6139 0.166 -3.701 0.000 -0.939 -0.289
Region_Northeast -0.8098 0.215 -3.775 0.000 -1.230 -0.389
Region_South -0.7986 0.167 -4.778 0.000 -1.126 -0.471
In [28]:
from sklearn.metrics import roc_auc_score
#ROC curve
log_reg = LogisticRegression()
log_reg.fit(df[['Year', 'Month', 'Day', 'Lat', 'Long',
'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']], df['Hoax'])
roc_auc_score(df['Hoax'], log_reg.predict(df[[ 'Year', 'Month', 'Day', 'Lat', 'Long',
'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']]))
Out[28]:
0.5
In [29]:
from sklearn.metrics import roc_curve, roc_auc_score
actuals = log_reg.predict(df[['Year', 'Month', 'Day', 'Lat', 'Long',
'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South'
]])
probas = log_reg.predict_proba(df[['Year', 'Month', 'Day', 'Lat', 'Long',
'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South' ]])
plt.plot(roc_curve(df[['Hoax']], probas[:,1])[0], roc_curve(df[['Hoax']], probas[:,1])[1])
Out[29]:
[<matplotlib.lines.Line2D at 0x12fd913c8>]
In [30]:
#reduced to more siginificant features
sm.Logit(df['Hoax'], sm.add_constant(df[['Month', 'Duration_Sec', 'ASTR','HOL']])).fit().summary()
Optimization terminated successfully.
Current function value: 0.248330
Iterations 7
Out[30]:
Logit Regression Results
Dep. Variable: Hoax No. Observations: 12172
Model: Logit Df Residuals: 12167
Method: MLE Df Model: 4
Date: Wed, 08 Jun 2016 Pseudo R-squ.: 0.02901
Time: 19:03:07 Log-Likelihood: -3022.7
converged: True LL-Null: -3113.0
LLR p-value: 5.436e-38
coef std err z P>|z| [95.0% Conf. Int.]
const -3.5861 0.105 -34.226 0.000 -3.792 -3.381
Month 0.1407 0.012 11.762 0.000 0.117 0.164
Duration_Sec 1.587e-05 5.74e-06 2.765 0.006 4.62e-06 2.71e-05
ASTR -0.2318 0.101 -2.292 0.022 -0.430 -0.034
HOL -0.7448 0.216 -3.450 0.001 -1.168 -0.322
In [31]:
from sklearn.metrics import roc_auc_score
#ROC curve
log_reg = LogisticRegression()
log_reg.fit(df[['Month', 'Duration_Sec', 'ASTR','HOL']], df['Hoax'])
roc_auc_score(df['Hoax'], log_reg.predict(df[[ 'Month', 'Duration_Sec', 'ASTR','HOL']]))
Out[31]:
0.49991158267020336
In [32]:
from sklearn.metrics import roc_curve, roc_auc_score
actuals = log_reg.predict(df[['Month', 'Duration_Sec', 'ASTR','HOL'
]])
probas = log_reg.predict_proba(df[['Month', 'Duration_Sec', 'ASTR','HOL' ]])
plt.plot(roc_curve(df[['Hoax']], probas[:,1])[0], roc_curve(df[['Hoax']], probas[:,1])[1])
Out[32]:
[<matplotlib.lines.Line2D at 0x12fe321d0>]
In [33]:
df = df[(~df.Shape.isnull()) & (~df.Summary.isnull()) ]
In [34]:
df.columns
Out[34]:
Index(['Date', 'index', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat',
'Long', 'Shape', 'Duration_Sec', 'Summary', 'WeekDay', 'Week',
'Quarter', 'TimePer', 'state', 'state abbr', 'Region', 'Hoax', 'ASTR',
'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
'Shape_Triangle', 'Region_Midwest', 'Region_Northeast', 'Region_South',
'Region_West'],
dtype='object')
In [35]:
# All Features
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
X = df[['Year', 'Month', 'Day', 'Lat',
'Long', 'Duration_Sec', 'WeekDay', 'Week',
'Quarter', 'TimePer', 'ASTR',
'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
'Shape_Triangle', 'Region_Midwest', 'Region_Northeast','Region_West', 'Region_South']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7, )
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores,scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485 0.83320707 0.52765152 0.65997475 0.5 0.57422879
0.5 ], Average AUC 0.5955067095465074
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.77954545 0.54419192 0.65056818 0.47487374 0.56488208
0.5353626 ], Average AUC 0.590691519625519
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.5885101 0.77954545 0.3709596 0.59930556 0.56546717 0.55670141
0.5353626 ], Average AUC 0.5708359838508423
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.55359848 0.83320707 0.74835859 0.67714646 0.5 0.56017583
0.53451058], Average AUC 0.6295710021503577
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485 0.90833333 0.5 0.5157197 0.46767677 0.55721204
0.4530569 ], Average AUC 0.5679262275652427
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.63028988 0.96435448 0.89397885 0.7565604 0.56590383 0.62786113
0.64169723], Average AUC 0.7258065429039684
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.61851063 0.96704809 0.92865636 0.80903041 0.6142177 0.60186881
0.57806155], Average AUC 0.7310562192399628
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.6071391 0.96442024 0.80155198 0.81200284 0.58698443 0.59956307
0.61315341], Average AUC 0.7121164380605928
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.60240162 0.96889205 0.90779935 0.84701968 0.58261521 0.61873552
0.59120275], Average AUC 0.7312380249985674
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.60304082 0.97010732 0.87487637 0.79548611 0.5721512 0.64603601
0.60477925], Average AUC 0.7237824400076907
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.63609007 0.96897622 0.89418929 0.80729956 0.55639468 0.60137134
0.58650472], Average AUC 0.7215465534078732
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.59360006 0.96369423 0.89872422 0.77617056 0.6117056 0.62700569
0.58354789], Average AUC 0.7220640352415619
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.61241846 0.96590646 0.89944234 0.78039773 0.59651726 0.62568699
0.60670358], Average AUC 0.7267246863219468
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.62999527 0.97025726 0.88473801 0.79762731 0.59815867 0.61492683
0.55787331], Average AUC 0.7219395213003941
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.62090173 0.96312342 0.89920297 0.77712279 0.59321338 0.64862866
0.58905811], Average AUC 0.7273215799393482
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.62668087 0.97058607 0.91334175 0.80270676 0.55632365 0.62511845
0.593358 ], Average AUC 0.7268736493565312
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.61268413 0.96864741 0.85261469 0.80599747 0.58434606 0.62923773
0.60551447], Average AUC 0.7227202829460618
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66043508 0.96897622 0.87697285 0.82811185 0.61100589 0.63252264
0.58341783], Average AUC 0.7373489081858552
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.64836122 0.96949442 0.89240583 0.8005866 0.58130261 0.61773005
0.5931085 ], Average AUC 0.7289984603341262
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.60298032 0.9706492 0.9038589 0.81220013 0.58041351 0.62181512
0.61244207], Average AUC 0.7291941788879196
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.62212226 0.96513047 0.84683291 0.7935685 0.59391572 0.64453569
0.58581196], Average AUC 0.7217025027466655
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.62050715 0.96484638 0.8965383 0.8004314 0.59913194 0.62523163
0.60225505], Average AUC 0.7298488366924518
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.63007944 0.97173822 0.88900989 0.79553083 0.57644676 0.65003948
0.61141488], Average AUC 0.7320370703093415
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.60429819 0.97297191 0.88524832 0.80073127 0.57659407 0.64351179
0.59369774], Average AUC 0.7252933266247118
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.59968697 0.97018624 0.8724274 0.81669297 0.59836911 0.64112971
0.59377472], Average AUC 0.7274667312332099
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.62320602 0.96714804 0.89085385 0.81401515 0.58354903 0.62239419
0.59808522], Average AUC 0.728464500989718
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.6121896 0.96728483 0.88290457 0.8256392 0.59805871 0.63317014
0.59115232], Average AUC 0.7300570532802764
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.63671086 0.96341803 0.88977536 0.81274989 0.59151673 0.62941935
0.60816607], Average AUC 0.733108042584684
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.63060553 0.9659275 0.84559659 0.80101273 0.58043718 0.62694778
0.60601347], Average AUC 0.7223629709880852
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.6243608 0.97047296 0.88619792 0.7963831 0.5743529 0.62034639
0.60289207], Average AUC 0.7250008768989324
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.6189052 0.97184606 0.89505471 0.79017519 0.60685238 0.61584544
0.6278792 ], Average AUC 0.73236545494379
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.62334017 0.96708228 0.89346328 0.82088857 0.59223485 0.63842914
0.61345069], Average AUC 0.7355555690961179
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.62652304 0.96772675 0.88076862 0.82694392 0.5960464 0.63440988
0.60033868], Average AUC 0.7332510416376936
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.62765678 0.97069392 0.87968487 0.82473169 0.60850431 0.62468414
0.61142549], Average AUC 0.7353401723417247
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.61910248 0.96833439 0.89169034 0.82341909 0.59207439 0.63150927
0.6108681 ], Average AUC 0.7338568643920204
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.62262995 0.97069129 0.89883207 0.79267677 0.59339752 0.63292535
0.58666663], Average AUC 0.7282599389094733
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.62554714 0.97085175 0.88693182 0.79422875 0.59076705 0.63104864
0.60968961], Average AUC 0.7298663925308856
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.61981534 0.9688105 0.88633207 0.81704809 0.59979219 0.61450569
0.58469983], Average AUC 0.727286243757411
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.61771886 0.97092803 0.88878367 0.82199863 0.5946391 0.62810328
0.60904728], Average AUC 0.7330312649687867
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.60251999 0.97284827 0.87755419 0.81374421 0.59388941 0.6473784
0.61134852], Average AUC 0.7313261423277716
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.54009891 0.79165088 0.61919981 0.53062395 0.49251894 0.4928985
0.58172176], Average AUC 0.5783875360585188
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.5875 0.59850589 0.74452336 0.6701573 0.57473169 0.5787271
0.52927109], Average AUC 0.6119166335356766
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.62788563 0.90236742 0.75104167 0.50675505 0.53962542 0.52281007
0.69109552], Average AUC 0.6487972529396816
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.54663826 0.896754 0.70006839 0.65460859 0.56899463 0.60953885
0.55673994], Average AUC 0.647620379977431
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.54880051 0.91106902 0.75967224 0.72825126 0.60242529 0.60324805
0.56754275], Average AUC 0.6744298754388686
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.64482586 0.97267729 0.82685185 0.74975274 0.60001841 0.66515582
0.61433986], Average AUC 0.724803120101308
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.63358323 0.96882365 0.71971275 0.78791561 0.60856218 0.64664666
0.6300663 ], Average AUC 0.7136157711894714
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.62216435 0.95352483 0.86424137 0.71327336 0.59740372 0.66181565
0.64199982], Average AUC 0.7220604433998278
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.63530093 0.9705282 0.82642835 0.69346328 0.60801768 0.62334702
0.60716542], Average AUC 0.7091786949478325
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.60167824 0.96404146 0.80283302 0.78322285 0.59487847 0.6486892
0.61645795], Average AUC 0.7159715988751979
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.62376631 0.97342172 0.80575021 0.77088068 0.57096749 0.65124237
0.63624009], Average AUC 0.7188955522204225
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.63151042 0.96699021 0.79587016 0.75394571 0.61017203 0.66364498
0.63090505], Average AUC 0.7218626508327605
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.64996054 0.9665746 0.80081019 0.74131944 0.6010101 0.66615603
0.65162945], Average AUC 0.7253514788278107
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.6560948 0.96864741 0.82857481 0.79339489 0.60565288 0.67747684
0.64974758], Average AUC 0.739941315949911
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.63662142 0.9690262 0.7534354 0.74745107 0.57365846 0.65454833
0.63377695], Average AUC 0.7097882605371826
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.65515572 0.96656671 0.77732534 0.77446075 0.58358586 0.66227627
0.65695122], Average AUC 0.7251888385944071
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.64458386 0.96976536 0.80246212 0.7608165 0.57275621 0.66449779
0.64950604], Average AUC 0.7234839830801736
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.6296533 0.97422927 0.85083386 0.77697022 0.57869055 0.66410823
0.6448611 ], Average AUC 0.7313352208441619
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.64513626 0.97497106 0.78254156 0.76776357 0.56707176 0.66186829
0.64271116], Average AUC 0.7202948091768532
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.64050926 0.97183028 0.82243792 0.76441235 0.58298085 0.64913403
0.66410974], Average AUC 0.7279163472268504
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.64717487 0.97516046 0.79735638 0.73356481 0.60893834 0.66011266
0.64500974], Average AUC 0.7239024659599987
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.63570602 0.97617582 0.8201573 0.7816472 0.59678293 0.6547431
0.64575559], Average AUC 0.7301382808255165
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.66009838 0.97418718 0.80458228 0.78050295 0.60209649 0.66880922
0.65178074], Average AUC 0.7345796053562635
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.63171033 0.97213016 0.84660406 0.77423453 0.59878735 0.66907507
0.65069249], Average AUC 0.7347477139896108
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.64473906 0.97558923 0.83618476 0.79320812 0.55318813 0.65283744
0.63119701], Average AUC 0.7267062507578947
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.62567603 0.97025463 0.78373053 0.76050873 0.59433396 0.66282902
0.66332673], Average AUC 0.7229513775466382
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.65160196 0.9692419 0.83776305 0.76512784 0.58175768 0.66126027
0.64647754], Average AUC 0.7304614617052242
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.6362479 0.97267992 0.83070286 0.77003104 0.60373527 0.65692251
0.66053711], Average AUC 0.732979516449487
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.64436553 0.97399779 0.80724169 0.77793824 0.59648306 0.66898294
0.63763092], Average AUC 0.7295200242091068
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.63156303 0.97433975 0.80737058 0.78180766 0.60837279 0.65947041
0.6529141 ], Average AUC 0.7308340466926416
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.65093382 0.9729456 0.82653356 0.79218224 0.62156197 0.66115761
0.63344251], Average AUC 0.7369653315225476
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.63403304 0.96258944 0.83465909 0.72682029 0.60356166 0.67110708
0.67204595], Average AUC 0.7292595050975271
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.64904777 0.97529198 0.81208176 0.77611795 0.60601326 0.66890398
0.65149142], Average AUC 0.7341354455062812
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.64944234 0.97254577 0.80852273 0.77436606 0.59070128 0.66229469
0.64777813], Average AUC 0.7293787138584032
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.62925084 0.97335596 0.80786511 0.77336911 0.57211963 0.66914087
0.63879083], Average AUC 0.7234131927056637
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.6509496 0.97592856 0.80270676 0.77143308 0.60970381 0.66237366
0.6540342 ], Average AUC 0.7324470937628164
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.654627 0.97635995 0.80908828 0.76241846 0.59305819 0.67057802
0.63894743], Average AUC 0.7292967598868673
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.62087016 0.97455808 0.78861006 0.77628893 0.60742845 0.65725679
0.62311216], Average AUC 0.7211606622994342
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.65768887 0.9728588 0.78296507 0.75313815 0.60333281 0.65940724
0.6500077 ], Average AUC 0.7256283760054332
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.65549505 0.97357428 0.82674137 0.77430029 0.6025384 0.67623973
0.64389761], Average AUC 0.7361123935765976
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.55263836 0.83441972 0.50463226 0.66463068 0.6295665 0.60520636
0.57285656], Average AUC 0.6234214913141471
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.52863794 0.77332702 0.77033617 0.64473117 0.5794718 0.561255
0.56098674], Average AUC 0.6312494070363553
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.62841961 0.95251999 0.73274937 0.68579809 0.46687447 0.56618235
0.69342595], Average AUC 0.6751385478848724
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.61306292 0.92574179 0.6727904 0.7023911 0.57022306 0.63676037
0.51658907], Average AUC 0.6625083892778669
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.37390309 0.77704914 0.82506839 0.53658723 0.54666193 0.56401348
0.57489502], Average AUC 0.5997397545722347
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.64833491 0.97764362 0.75792824 0.77776463 0.58432239 0.66868288
0.64546893], Average AUC 0.7228779427668035
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.64051452 0.97606008 0.73693971 0.66810553 0.52757786 0.67210729
0.65118619], Average AUC 0.6960701681604736
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.65495844 0.97690709 0.7686553 0.78837332 0.53802872 0.65142925
0.63084665], Average AUC 0.7155998251565377
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.65558449 0.97369003 0.69247159 0.69001473 0.61677452 0.67180985
0.62927799], Average AUC 0.7042318855220643
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.65117319 0.97070444 0.70960911 0.6521412 0.57467119 0.65865445
0.66770625], Average AUC 0.6978085484877509
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.654385 0.97232481 0.76893939 0.73787616 0.55566077 0.66374237
0.67485415], Average AUC 0.7182546639757155
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.65950652 0.94583596 0.72841172 0.73954388 0.57196707 0.67907454
0.63439539], Average AUC 0.7083907259279982
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.67341383 0.97557081 0.70672875 0.71377578 0.5698548 0.66482944
0.64067269], Average AUC 0.7064065845431348
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.64535459 0.97284564 0.69715646 0.71888678 0.60133628 0.67555801
0.66060082], Average AUC 0.7102483692218888
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.67789089 0.97085964 0.77571812 0.66290246 0.5694839 0.67635292
0.65485967], Average AUC 0.712581084932266
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.65538721 0.97592593 0.71052452 0.74271359 0.56805029 0.68364656
0.6658005 ], Average AUC 0.7145783698827801
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.65425084 0.97452652 0.73592435 0.74152199 0.57220907 0.68136713
0.65927634], Average AUC 0.7170108914071859
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.6739294 0.97306397 0.74603851 0.70572391 0.5584254 0.67456307
0.64653063], Average AUC 0.7111821257413358
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.660635 0.97267203 0.73394886 0.73337016 0.57663878 0.68013792
0.6555365 ], Average AUC 0.7161341805082356
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.66929451 0.97532355 0.72878788 0.69412879 0.5806634 0.68468625
0.66854234], Average AUC 0.7144895306874801
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.65296454 0.97686237 0.73677662 0.68961753 0.58801557 0.67795325
0.65511182], Average AUC 0.7110431020217354
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.67865372 0.97417666 0.72020202 0.70434291 0.57051768 0.6741893
0.66530946], Average AUC 0.7124845363797331
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.65048401 0.97423716 0.72020465 0.6819234 0.53846012 0.67431565
0.66263663], Average AUC 0.7003230880638783
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.66583544 0.97271412 0.72696759 0.75179398 0.60976694 0.68526269
0.66126173], Average AUC 0.7248003549185073
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.67446075 0.97315341 0.72290878 0.70196759 0.61024832 0.68196462
0.66498564], Average AUC 0.718527015918608
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.66663773 0.97453441 0.68459596 0.69294508 0.56491214 0.68459939
0.65108532], Average AUC 0.7027585755116476
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.65722064 0.97758049 0.71740846 0.69404461 0.56463594 0.68280427
0.65955504], Average AUC 0.7076070667599721
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.65103641 0.97314552 0.73439078 0.73582965 0.57070444 0.67432354
0.66371425], Average AUC 0.7147349418285119
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.65394834 0.97251157 0.69085122 0.73435922 0.56777673 0.66615866
0.66511039], Average AUC 0.7072451614617432
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.63681082 0.97289825 0.74816393 0.70817551 0.59111953 0.67717941
0.6547535 ], Average AUC 0.7127287057619457
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.64308975 0.96725063 0.71125842 0.66490951 0.57281934 0.68171194
0.65833143], Average AUC 0.6999101459986166
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.65096801 0.97627578 0.6996896 0.66980745 0.58465383 0.67758212
0.66423714], Average AUC 0.7033162771364248
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.65736795 0.97509733 0.73086069 0.7374737 0.57216435 0.68148821
0.66283835], Average AUC 0.7167557962784838
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.65788352 0.97496317 0.7543771 0.6394939 0.58176031 0.68178827
0.66212701], Average AUC 0.7074847559619412
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.66462542 0.97463173 0.76002736 0.70828072 0.58724221 0.68138555
0.66314624], Average AUC 0.7199056069024021
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.65458228 0.97565236 0.73637416 0.7223669 0.57825126 0.6843546
0.67001014], Average AUC 0.7173702424687692
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.65135469 0.97682555 0.69820076 0.71298401 0.57743582 0.68075121
0.66042033], Average AUC 0.708281765544375
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.66178714 0.97532355 0.70226221 0.70785985 0.59051189 0.66964361
0.65516491], Average AUC 0.708936164526637
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.64925558 0.9749737 0.73485901 0.72157513 0.59316604 0.67292325
0.66328692], Average AUC 0.7157199437930158
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.66031408 0.97446601 0.72472643 0.70967487 0.58825758 0.68208833
0.66319933], Average AUC 0.7146752337793275
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.61674821 0.8455282 0.57505261 0.75975116 0.59886101 0.61435039
0.52956837], Average AUC 0.6485514196829182
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.65097327 0.83401726 0.72545507 0.60422717 0.56292088 0.5999263
0.60241165], Average AUC 0.6542759426117417
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.56281566 0.93666877 0.8067077 0.69787195 0.57274569 0.61864077
0.56967942], Average AUC 0.6807328500355648
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.60562395 0.89917929 0.68988847 0.57542088 0.51927083 0.54853917
0.5388317 ], Average AUC 0.6252506125937298
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.59711963 0.84238479 0.69423401 0.3484375 0.44429714 0.61629554
0.53441503], Average AUC 0.5824548041872875
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.66273148 0.94165088 0.66576705 0.62904566 0.61181871 0.65279796
0.67545401], Average AUC 0.691323678701861
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.66551978 0.97498685 0.64291614 0.71703756 0.5529093 0.67947989
0.69272788], Average AUC 0.7036539150191271
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.64586753 0.97643098 0.66336806 0.67051242 0.5553951 0.66212624
0.67695632], Average AUC 0.692950946785677
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.63158933 0.97124106 0.66226326 0.66546717 0.60073653 0.66544009
0.65328039], Average AUC 0.6928596901417778
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.6548085 0.97623106 0.6715383 0.68731061 0.54047506 0.65320331
0.68338226], Average AUC 0.6952784424494702
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.66947601 0.97136206 0.68610848 0.60649726 0.5600142 0.68506791
0.68524289], Average AUC 0.6948241167566683
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.66499369 0.94360006 0.68721591 0.68080282 0.55239899 0.69039008
0.65906931], Average AUC 0.6969244091586397
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.64525463 0.97345591 0.64472854 0.71331019 0.57239583 0.66078122
0.67090728], Average AUC 0.6972619414665278
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.6559475 0.97318497 0.72830913 0.66214226 0.5262258 0.67319436
0.64679871], Average AUC 0.6951146746637266
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.6419113 0.97296402 0.69813237 0.65803872 0.57594697 0.68286218
0.66689139], Average AUC 0.6995352780067118
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.65899095 0.96985743 0.63398569 0.71889731 0.53936763 0.67806117
0.67020125], Average AUC 0.6956230610129316
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.66315236 0.97357428 0.64614899 0.62573653 0.58272043 0.67456043
0.68575251], Average AUC 0.6930922196453925
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.642248 0.97512363 0.67836963 0.64569655 0.55799926 0.67957465
0.63666477], Average AUC 0.6879537856741565
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.66559343 0.97312447 0.70289878 0.67686237 0.55093645 0.65539587
0.65676011], Average AUC 0.6973673565858926
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.64110638 0.97233796 0.6889678 0.66642992 0.57150936 0.67438408
0.66069637], Average AUC 0.6964902687642691
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.64771149 0.97797769 0.6387258 0.60797033 0.55032881 0.68519162
0.66826364], Average AUC 0.6823099115980006
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.64914773 0.97298243 0.63721591 0.68885732 0.58152357 0.68738945
0.6748223 ], Average AUC 0.6988483864743182
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.66129787 0.9740241 0.64410248 0.59813237 0.5597722 0.67647399
0.65924184], Average AUC 0.6818635504840189
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.65685764 0.9739557 0.67777252 0.64489162 0.58138152 0.67929564
0.66719929], Average AUC 0.697336276351247
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.65328283 0.9743871 0.67897727 0.67430029 0.57306397 0.67813224
0.65764133], Average AUC 0.6985407189801046
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.6525463 0.97496843 0.63980955 0.67636785 0.51845802 0.68265161
0.68051301], Average AUC 0.6893306817098063
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.65388521 0.97161984 0.63980429 0.62767782 0.5093487 0.68181986
0.67247594], Average AUC 0.6795188078006907
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.64475221 0.97265099 0.66684817 0.65382471 0.57458439 0.66525848
0.66554834], Average AUC 0.6919238965873715
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.67616004 0.97417403 0.6513205 0.64056713 0.57876157 0.67575542
0.64984844], Average AUC 0.6923695906593939
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.64389994 0.97390572 0.70099958 0.66031671 0.57497896 0.67582649
0.66990397], Average AUC 0.6999759091196698
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.66539089 0.97491319 0.66376789 0.62882207 0.54526778 0.69067698
0.66484231], Average AUC 0.6905258745893565
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.6584596 0.96876578 0.65530303 0.61936027 0.54447338 0.67868762
0.66968632], Average AUC 0.6849622852919474
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.65447706 0.97477378 0.65351431 0.63839436 0.56949968 0.67655822
0.66096179], Average AUC 0.6897398876586598
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.66179766 0.97150936 0.65014731 0.63794192 0.56493845 0.67974574
0.66199164], Average AUC 0.689724583076864
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.64836122 0.97405303 0.70105219 0.6703388 0.57353746 0.68473889
0.66362666], Average AUC 0.7022440362517416
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.66997317 0.96444655 0.65826494 0.61417298 0.56702441 0.68397031
0.66595179], Average AUC 0.6891148781959455
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.65527146 0.97598117 0.64760101 0.63527462 0.53752367 0.68545746
0.673888 ], Average AUC 0.6872853431350654
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.65343013 0.97339015 0.64078283 0.63468013 0.57095434 0.67608707
0.66179258], Average AUC 0.687302461523447
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.64649621 0.97489478 0.65356692 0.64010417 0.56871843 0.67842441
0.6708887 ], Average AUC 0.6904419450298608
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.65320918 0.97409249 0.68586385 0.64705387 0.58934922 0.67708728
0.66413893], Average AUC 0.6986849740550267
((24, 41, 2), 0.73994131594991097)
In [137]:
print (best)
((30, 21, 2), 0.7402365054023613)
In [36]:
model = RandomForestClassifier(n_estimators=21, max_depth=2, min_samples_leaf=30, min_samples_split=6)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())
0.92964360587
0.721820840019
In [140]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[140]:
Features
Importance Score
0
Year
0.130991
1
Month
0.121567
12
Pop
0.120769
7
Week
0.118995
35
Region_West
0.096590
8
Quarter
0.089630
2
Day
0.080987
5
Duration_Sec
0.063953
4
Long
0.050639
9
TimePer
0.040954
3
Lat
0.029090
6
WeekDay
0.018264
33
Region_Midwest
0.014872
13
Milit_Share
0.014493
22
Shape_Egg
0.005622
26
Shape_Light
0.001783
11
HOL
0.000474
20
Shape_Diamond
0.000192
25
Shape_Formation
0.000135
34
Region_Northeast
0.000000
32
Shape_Triangle
0.000000
31
Shape_Teardrop
0.000000
30
Shape_Sphere
0.000000
29
Shape_Rectangle
0.000000
28
Shape_Oval
0.000000
27
Shape_Other
0.000000
18
Shape_Cross
0.000000
24
Shape_Flash
0.000000
23
Shape_Fireball
0.000000
21
Shape_Disk
0.000000
19
Shape_Cylinder
0.000000
17
Shape_Cone
0.000000
16
Shape_Circle
0.000000
15
Shape_Cigar
0.000000
14
Shape_Chevron
0.000000
10
ASTR
0.000000
36
Region_South
0.000000
In [37]:
# 2nd revision: remove the 5 least important features
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
X = df[['Year', 'Month', 'Lat', 'Long', 'Duration_Sec', 'Week',
'Quarter', 'TimePer', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
'Shape_Triangle', 'Region_Midwest', 'Region_Northeast','Region_West']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.56717172 0.59880051 0.5032197 0.49810606 0.49968434 0.6105259
0.5353626 ], Average AUC 0.5446958315380748
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.50164141 0.77954545 0.9125 0.65056818 0.55069444 0.5
0.53584833], Average AUC 0.6329711171974293
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.5 0.54905303 0.5032197 0.65751263 0.51565657 0.6105259
0.53451058], Average AUC 0.5529254859833632
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.91401515 0.74488636 0.65056818 0.55498737 0.5
0.58429108], Average AUC 0.6477378304640258
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.56717172 0.60915404 0.37064394 0.65751263 0.50991162 0.56017583
0.53451058], Average AUC 0.544154335483691
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64037774 0.95833596 0.85390888 0.82712805 0.59408933 0.64849442
0.57352012], Average AUC 0.7279792148403323
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.57314026 0.96291824 0.90542666 0.81364689 0.61184764 0.6165219
0.56509287], Average AUC 0.7212277806257523
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65885943 0.9615846 0.84119581 0.82299032 0.61196075 0.62429722
0.62015533], Average AUC 0.7344347794823437
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.58540614 0.96023779 0.90136258 0.7654698 0.63495633 0.65763582
0.63289043], Average AUC 0.7339941293107018
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.55930661 0.96082702 0.89076968 0.78248895 0.59786406 0.66427932
0.56219708], Average AUC 0.716818959800272
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.60507155 0.96330492 0.88141572 0.81638258 0.61006681 0.63520741
0.62171337], Average AUC 0.7333089096042841
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.62658354 0.95882786 0.860877 0.78493266 0.57609954 0.6433407
0.60794577], Average AUC 0.7226581533548703
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.58920981 0.96344171 0.90842277 0.78344118 0.60739426 0.65395873
0.60224444], Average AUC 0.7297304122480747
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.64181134 0.96167666 0.89194024 0.80190972 0.59871107 0.64147189
0.61967225], Average AUC 0.7367418819696117
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.60582123 0.96322075 0.87579177 0.80871212 0.58762889 0.66122605
0.61273138], Average AUC 0.7307331715017374
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.64910301 0.96455703 0.89147201 0.8131471 0.5931108 0.65389556
0.62044199], Average AUC 0.7408182120077432
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.6468408 0.96580124 0.89656197 0.82608375 0.59444971 0.67057012
0.59308992], Average AUC 0.7419139313893625
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.6360322 0.9633549 0.88996738 0.82107271 0.5947338 0.66187882
0.61803193], Average AUC 0.7407245323975223
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.59244792 0.96445444 0.88549295 0.81054819 0.61693497 0.64434618
0.59820732], Average AUC 0.7303474240954542
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.63167614 0.96532513 0.85536616 0.79937658 0.59841909 0.65842809
0.60599755], Average AUC 0.7306555324030172
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.60872527 0.96610375 0.84042771 0.81059817 0.62130156 0.64878395
0.63546505], Average AUC 0.733057923774342
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.63197075 0.96578809 0.9010101 0.79579914 0.60508733 0.65239261
0.62712008], Average AUC 0.7398811572687837
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.64115899 0.9666693 0.87313763 0.77689131 0.62445023 0.64679669
0.61527681], Average AUC 0.7349115650658319
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.6119213 0.9670323 0.88984901 0.83634259 0.63046086 0.667759
0.59867712], Average AUC 0.7431488833401619
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.61099011 0.96495949 0.89124053 0.82989268 0.62243529 0.63386766
0.61385679], Average AUC 0.7381775058827426
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.63208912 0.96566446 0.85920665 0.81743476 0.62164089 0.65437987
0.60375736], Average AUC 0.7363104439984387
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.60123369 0.96665878 0.89225063 0.79858218 0.59659091 0.66128659
0.61207048], Average AUC 0.7326676064292528
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.60930661 0.96600116 0.88476957 0.84592803 0.62195391 0.63842914
0.60755294], Average AUC 0.7391344803333669
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.64538615 0.96598801 0.88073969 0.79078283 0.59945286 0.65125816
0.61383024], Average AUC 0.7353482773312174
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.61300505 0.96415457 0.86242109 0.83659775 0.61563026 0.64908665
0.60782102], Average AUC 0.7355309114902494
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.64434712 0.96379156 0.87664668 0.81560659 0.61085069 0.64332491
0.61142284], Average AUC 0.7379986262045992
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.60944602 0.96689815 0.89538352 0.82087805 0.61422033 0.66506107
0.61835309], Average AUC 0.7414628897906242
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.62107797 0.96543035 0.89324758 0.84287405 0.61209754 0.65727522
0.61764175], Average AUC 0.7442349213313905
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.60981429 0.9645807 0.86615109 0.83343855 0.60455598 0.63895031
0.6322826 ], Average AUC 0.7356819319011576
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.63002157 0.96710859 0.83206545 0.80264625 0.61018255 0.6543904
0.62352623], Average AUC 0.731420147916489
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.62155934 0.96790036 0.88172085 0.8047822 0.59818234 0.65287955
0.60749455], Average AUC 0.7335027422720046
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.64282934 0.96433344 0.89308712 0.80921454 0.61352851 0.64458833
0.61578377], Average AUC 0.7404807226786432
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.62512626 0.96623001 0.8787642 0.81617477 0.61867372 0.66398979
0.61459998], Average AUC 0.7405083893522345
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.62820128 0.96694287 0.88308607 0.80962226 0.60658933 0.65908349
0.62047384], Average AUC 0.7391427346105622
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.59809028 0.96529619 0.88015835 0.81173716 0.60679188 0.65725942
0.61976781], Average AUC 0.734157298960186
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.50903304 0.83396991 0.75684712 0.4342803 0.55069444 0.56661139
0.51912388], Average AUC 0.5957942981469565
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.67852483 0.96364162 0.63543245 0.53268624 0.56918666 0.59928932
0.55051041], Average AUC 0.6470387911492883
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.63904409 0.83320707 0.59902146 0.56300505 0.54884259 0.54194567
0.5433864 ], Average AUC 0.6097789055142817
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.60922243 0.83442761 0.55379314 0.64482586 0.53156566 0.5730996
0.53399035], Average AUC 0.6115606643192157
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.58891519 0.95117319 0.66338384 0.72366372 0.53248369 0.60699095
0.54110905], Average AUC 0.6582456616988475
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.64267414 0.96445181 0.82278251 0.76868161 0.65585806 0.67459992
0.67920181], Average AUC 0.7440356937515684
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.65095749 0.94904514 0.86366793 0.70562658 0.58990162 0.65627764
0.65535867], Average AUC 0.724405009266687
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.6471512 0.96503314 0.81353641 0.76677715 0.60070234 0.65726469
0.63495544], Average AUC 0.7264886219850825
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.63438026 0.96762942 0.87737269 0.76828967 0.60050768 0.65390872
0.63234365], Average AUC 0.7334902975320696
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.59618845 0.9730666 0.86317077 0.77491846 0.60032618 0.64409086
0.6338088 ], Average AUC 0.7265100162990598
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.67176978 0.96944971 0.7492924 0.79651199 0.614002 0.6592151
0.63729118], Average AUC 0.7282188797842497
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.63090015 0.97098853 0.83944129 0.77239846 0.5984638 0.67563698
0.66038582], Average AUC 0.7354592904431916
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.65743897 0.97553662 0.81986795 0.7344092 0.60154146 0.65451674
0.63816973], Average AUC 0.7259258094832336
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.61612479 0.967369 0.79694865 0.76341277 0.60884101 0.62701358
0.63794412], Average AUC 0.7168077053802595
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.64506524 0.96694813 0.80946444 0.77921402 0.5984375 0.66373447
0.65309725], Average AUC 0.7308515758381462
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.65240951 0.94272938 0.7851089 0.77479745 0.56297612 0.66113392
0.64737999], Average AUC 0.718076467194505
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.64146412 0.97146728 0.82112005 0.77236953 0.61525673 0.67056222
0.65365995], Average AUC 0.7351285551632836
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.63118687 0.96541456 0.8215988 0.79307397 0.57981113 0.65115551
0.65021207], Average AUC 0.7274932733766407
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.65465593 0.96923927 0.76655093 0.78539299 0.57901936 0.64343546
0.65057571], Average AUC 0.7212670925659996
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.62625737 0.96823706 0.8506471 0.78730008 0.59943182 0.64580964
0.64900704], Average AUC 0.732384301462454
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.62819076 0.96640888 0.84096696 0.76916824 0.62223011 0.6619104
0.62704311], Average AUC 0.7308454963813757
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.64084333 0.96940499 0.81658775 0.7717803 0.5692419 0.67743736
0.63838207], Average AUC 0.726239671382776
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.64534407 0.97344802 0.84423401 0.76597485 0.6059738 0.67793746
0.64017369], Average AUC 0.7361551288905078
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.63512205 0.97027304 0.80591856 0.78055819 0.59700652 0.63602074
0.65090749], Average AUC 0.7251152281311716
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.61993108 0.97010469 0.79951073 0.77589436 0.60902515 0.66070489
0.63234896], Average AUC 0.723931408478209
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.64470749 0.97574968 0.80109954 0.78430661 0.60113373 0.66953043
0.64917692], Average AUC 0.7322434854047161
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.6314657 0.96957597 0.81632997 0.7672138 0.60660511 0.6801195
0.64449482], Average AUC 0.7308292666335439
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.64790351 0.96877894 0.82934291 0.76541719 0.62034407 0.67256001
0.63822547], Average AUC 0.7346531574545245
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.64875842 0.96946286 0.84110375 0.75994581 0.60287247 0.67776111
0.64882656], Average AUC 0.7355329958487539
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.63623211 0.96587489 0.83792877 0.7790483 0.58484322 0.6659981
0.64749943], Average AUC 0.7310606897101541
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.64384207 0.96211858 0.79582544 0.78147622 0.59631471 0.68182775
0.637116 ], Average AUC 0.7283601099324856
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.63222327 0.96911037 0.82122527 0.76223169 0.56509628 0.66648242
0.65065003], Average AUC 0.7238599047655363
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.65253314 0.96903409 0.82065972 0.76732955 0.5961411 0.66999368
0.64775955], Average AUC 0.7319215471296389
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.62965593 0.97222748 0.81281566 0.77721486 0.60962226 0.66253685
0.64532029], Average AUC 0.7299133333885123
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.64427083 0.97054661 0.80969592 0.76097696 0.59187973 0.67715308
0.64908933], Average AUC 0.729087495109343
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.61968382 0.9699416 0.83333859 0.77848801 0.61121896 0.65268478
0.66600222], Average AUC 0.7330511393337907
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.62326915 0.97152252 0.82033091 0.77339541 0.59562553 0.67550011
0.64519819], Average AUC 0.7292631166841692
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.64736164 0.97114373 0.81509101 0.76887363 0.61014573 0.66356601
0.65855704], Average AUC 0.7335341138806896
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.62598117 0.97060974 0.81804503 0.76667456 0.60920402 0.66610865
0.64345966], Average AUC 0.7285832618116831
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.62958491 0.96339173 0.82854324 0.77295612 0.61524095 0.6714282
0.64396927], Average AUC 0.7321592044385217
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.56775568 0.83230745 0.36332071 0.23578756 0.54866635 0.57971415
0.54345541], Average AUC 0.524429616299419
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.56721907 0.66883944 0.7947601 0.39481797 0.4574153 0.51011529
0.53736125], Average AUC 0.5615040584727577
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.47098064 0.93904672 0.80948022 0.57373737 0.47945602 0.6270399
0.4504345 ], Average AUC 0.6214536245968809
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.53489057 0.8293771 0.7571049 0.5764415 0.46064289 0.6383028
0.52419351], Average AUC 0.6172790388077709
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.56471223 0.88919402 0.79483902 0.65997475 0.47950863 0.57729522
0.69242795], Average AUC 0.6654216872789079
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.62975063 0.97273779 0.73304398 0.64302662 0.61090593 0.66608497
0.64305621], Average AUC 0.6998008770175215
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.64955019 0.94708018 0.72689657 0.69738531 0.58632681 0.6594283
0.6375221 ], Average AUC 0.7005984935115084
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.67279303 0.96871054 0.68671086 0.6938447 0.55730745 0.66489787
0.66809908], Average AUC 0.701766219091566
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.63489846 0.97217487 0.742006 0.70514257 0.60145465 0.62959307
0.66966509], Average AUC 0.7078478165666938
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.67859585 0.97267729 0.78581124 0.67965593 0.59991319 0.65891503
0.65296984], Average AUC 0.7183626273492065
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.65517677 0.94077757 0.74688815 0.71977588 0.58045823 0.68693146
0.66139178], Average AUC 0.7130571202970236
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.63959649 0.97369003 0.70524253 0.72084122 0.59012258 0.66752737
0.66573149], Average AUC 0.7089645292240186
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.63581913 0.97180398 0.72226168 0.77105955 0.59877157 0.66610076
0.66039909], Average AUC 0.7180308229218293
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.64659354 0.97054661 0.68594539 0.66578809 0.56313394 0.66587703
0.66341167], Average AUC 0.6944708959504091
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.64618056 0.97272727 0.72482639 0.67991898 0.57608112 0.66611918
0.65291676], Average AUC 0.7026814661042955
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.6558791 0.96763731 0.72921665 0.68983586 0.57860375 0.67077279
0.68108102], Average AUC 0.710432354586535
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.64744844 0.97206965 0.76436237 0.66899989 0.59950284 0.68531796
0.64332695], Average AUC 0.7115754448829629
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.64648832 0.9750947 0.73691604 0.70613426 0.58815499 0.66743788
0.66394783], Average AUC 0.71202485847833
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.65018676 0.96999158 0.67528672 0.71488058 0.60949863 0.67038061
0.67149387], Average AUC 0.7088169640802461
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.64423138 0.97265099 0.75452178 0.71089541 0.57905619 0.65947568
0.6539413 ], Average AUC 0.7106818175151431
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.64577283 0.97673611 0.72604693 0.70235427 0.56155829 0.65780691
0.6698615 ], Average AUC 0.7057338345999017
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.63802346 0.96752946 0.73069497 0.70244108 0.58475379 0.66719046
0.67312889], Average AUC 0.7091088724648024
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.65689447 0.97004945 0.72626526 0.69309238 0.59133786 0.66415561
0.65738386], Average AUC 0.7084541271287671
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.64615688 0.9732823 0.75193866 0.71791088 0.56952862 0.66179196
0.67431003], Average AUC 0.7135599033573242
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.6455282 0.96867898 0.75700758 0.71708228 0.55868845 0.68216467
0.65738917], Average AUC 0.7123627596796676
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.66085332 0.97286932 0.74768782 0.73573495 0.58892835 0.68382554
0.64473104], Average AUC 0.7192329064710522
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.65069707 0.97558923 0.70026568 0.74520465 0.570494 0.6763845
0.66528026], Average AUC 0.7119879136808536
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.65454019 0.97274306 0.70341961 0.67369792 0.58364373 0.67635292
0.64250147], Average AUC 0.700985556726998
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.66274726 0.97031513 0.75918561 0.70183081 0.54487058 0.68303853
0.6628702 ], Average AUC 0.7121225894193094
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.64455492 0.97319287 0.7515362 0.72536564 0.58960701 0.6622052
0.66180585], Average AUC 0.7154668109526876
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.65087332 0.9723064 0.74645676 0.68275989 0.5599616 0.66950411
0.65685832], Average AUC 0.7055314831454318
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.64473906 0.97455545 0.7025463 0.7115767 0.57654935 0.67044115
0.65720337], Average AUC 0.705373053664267
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.65586069 0.97540509 0.70245686 0.72957439 0.58239952 0.67307591
0.65360686], Average AUC 0.7103399031961669
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.65193603 0.97091488 0.71953914 0.7061553 0.57703335 0.66802748
0.65540645], Average AUC 0.7070018041083213
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.64287142 0.97361374 0.74876368 0.70601326 0.59085911 0.68005633
0.65372365], Average AUC 0.7137001699548481
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.64509154 0.97303767 0.73279146 0.66973643 0.58718171 0.67142556
0.67252637], Average AUC 0.707398676854625
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.65259101 0.97179346 0.69942656 0.73473801 0.57286669 0.67809276
0.67949909], Average AUC 0.7127153665226212
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.6470907 0.97373211 0.68815236 0.70598432 0.56958912 0.67203359
0.66032477], Average AUC 0.702415281626443
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.64183765 0.97291141 0.77195654 0.67844329 0.56339962 0.67428932
0.65629827], Average AUC 0.7084480148808473
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.64557555 0.97238005 0.75388258 0.69724064 0.58593224 0.66277374
0.65075885], Average AUC 0.7097919486318524
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.60881734 0.79375263 0.77094381 0.6226089 0.57724116 0.61575068
0.61333125], Average AUC 0.657492253782547
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.58637942 0.93146044 0.51494108 0.70093119 0.60944865 0.61062592
0.57759174], Average AUC 0.6473397771246302
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.5905987 0.93338068 0.73087384 0.69344223 0.59707492 0.54634397
0.54635651], Average AUC 0.6625815494303288
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.4884233 0.83352799 0.50073653 0.60885154 0.59073548 0.57642662
0.5955743 ], Average AUC 0.5991822497502098
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.63074232 0.93159722 0.73456965 0.6362137 0.55212542 0.47570278
0.49093042], Average AUC 0.6359830739247083
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.63737111 0.96779514 0.67171454 0.64324495 0.5430424 0.60078701
0.66566247], Average AUC 0.6756596603200007
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.6479456 0.97234059 0.72426084 0.62173032 0.54141151 0.6747789
0.6687069 ], Average AUC 0.6930249528931418
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.62161195 0.97018098 0.68715541 0.62546296 0.52791982 0.66244736
0.67417201], Average AUC 0.6812786410303415
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.63812079 0.96690867 0.73513784 0.65592645 0.55173611 0.67801906
0.64185649], Average AUC 0.6953864868094005
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.65873053 0.97467119 0.76228956 0.65137574 0.61210017 0.62557381
0.63942254], Average AUC 0.703451934105843
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.63941761 0.97566551 0.67241425 0.57685711 0.57849064 0.67248631
0.67857541], Average AUC 0.6848438343860422
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.64565972 0.97219329 0.66966803 0.57632313 0.5881734 0.66075226
0.63133238], Average AUC 0.6777288880749917
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.62748843 0.96968119 0.69039615 0.63503525 0.55177557 0.66534797
0.67462323], Average AUC 0.6877639678895066
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.63482744 0.97413457 0.69229535 0.67088331 0.58027936 0.65994683
0.66064594], Average AUC 0.6961446861872936
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.63310974 0.97683344 0.72746475 0.60981429 0.56540404 0.6566514
0.65420672], Average AUC 0.6890691980959768
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.65976168 0.9674979 0.65261732 0.64768782 0.52553662 0.66920668
0.67193978], Average AUC 0.6848925401687407
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.6328388 0.97187237 0.66148464 0.64713542 0.59315025 0.6740577
0.65829427], Average AUC 0.6912619213474679
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.63552978 0.97418455 0.64536774 0.65971696 0.56217119 0.66081543
0.65742633], Average AUC 0.6850302841617674
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.66201862 0.97047296 0.69085122 0.61035354 0.55425347 0.64853127
0.6605265 ], Average AUC 0.6852867968204626
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.62770412 0.97334017 0.67942182 0.66089278 0.5787379 0.66638503
0.66452911], Average AUC 0.6930015627076906
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.66000368 0.97293771 0.65994581 0.59082492 0.54972117 0.67051485
0.67406053], Average AUC 0.6825726662907804
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.64963699 0.97522622 0.65087332 0.58137363 0.5526673 0.67092809
0.65914098], Average AUC 0.6771209328825714
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.64558081 0.97496843 0.67825126 0.67293508 0.58167877 0.66265793
0.66907053], Average AUC 0.6978775448464638
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.64597275 0.97453441 0.70443234 0.63754209 0.55757839 0.66817225
0.65683974], Average AUC 0.6921531375883303
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.6302662 0.97023359 0.71454388 0.62862216 0.55381944 0.66383712
0.66465917], Average AUC 0.6894259372122414
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.64694076 0.97372422 0.68405671 0.6009233 0.54404987 0.68290956
0.67400479], Average AUC 0.6866584590710777
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.65219907 0.97616004 0.65578178 0.65375631 0.58259154 0.66816435
0.67036846], Average AUC 0.6941459364590449
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.64504945 0.97472906 0.67776463 0.6429556 0.54771412 0.6564224
0.65686628], Average AUC 0.685928792365913
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.65568971 0.97244055 0.68675821 0.67320865 0.54038037 0.65915719
0.66316482], Average AUC 0.6929713568405278
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.64162984 0.97084122 0.69351589 0.60107323 0.57823022 0.67057012
0.66094056], Average AUC 0.6881144406785474
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.64636206 0.97495791 0.6837621 0.64818497 0.4923427 0.67075437
0.6819118 ], Average AUC 0.6854679881609381
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.64800084 0.97607586 0.69083018 0.64115899 0.55035774 0.66955675
0.67942477], Average AUC 0.6936293043486084
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.65380892 0.96976273 0.67670455 0.62317445 0.56880524 0.65791219
0.66984292], Average AUC 0.6885730008102963
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.64012521 0.97171191 0.71272359 0.64341593 0.55303293 0.66140503
0.66076803], Average AUC 0.6918832345526347
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.65193077 0.97325337 0.70616319 0.64062763 0.57069918 0.65392451
0.66376999], Average AUC 0.6943383772543806
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.65213331 0.97639678 0.66233954 0.58727904 0.53947022 0.66463466
0.67642281], Average AUC 0.679810909848124
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.64952388 0.9761653 0.65403514 0.61916035 0.53984112 0.66239471
0.66964651], Average AUC 0.68153814586846
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.64941077 0.9739899 0.70041035 0.63560869 0.57018887 0.67240208
0.66250922], Average AUC 0.6949314134333072
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.65900147 0.97397412 0.72094644 0.63276778 0.56932607 0.66338966
0.65935863], Average AUC 0.6969663108017602
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.63317287 0.97175926 0.69953441 0.6503025 0.55920665 0.6645557
0.66749125], Average AUC 0.6922889492315141
((18, 121, 1), 0.74423492133139046)
In [38]:
model = RandomForestClassifier(n_estimators=81, max_depth=1, min_samples_leaf=6, min_samples_split=6)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())
0.92964360587
0.740138453703
In [143]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[143]:
Features
Importance Score
5
Week
0.172840
8
Pop
0.172840
0
Year
0.098765
6
Quarter
0.086420
1
Month
0.074074
31
Region_West
0.074074
4
Duration_Sec
0.061728
3
Long
0.061728
9
Milit_Share
0.061728
7
TimePer
0.061728
13
Shape_Cone
0.024691
22
Shape_Light
0.012346
30
Region_Northeast
0.012346
29
Region_Midwest
0.012346
18
Shape_Egg
0.012346
28
Shape_Triangle
0.000000
27
Shape_Teardrop
0.000000
26
Shape_Sphere
0.000000
25
Shape_Rectangle
0.000000
24
Shape_Oval
0.000000
23
Shape_Other
0.000000
19
Shape_Fireball
0.000000
21
Shape_Formation
0.000000
20
Shape_Flash
0.000000
17
Shape_Disk
0.000000
2
Lat
0.000000
15
Shape_Cylinder
0.000000
14
Shape_Cross
0.000000
12
Shape_Circle
0.000000
11
Shape_Cigar
0.000000
10
Shape_Chevron
0.000000
16
Shape_Diamond
0.000000
In [39]:
# 3rd revision: remove the 5 least important features
X = df[['Year', 'Month', 'Lat', 'Duration_Sec', 'Week','Quarter', 'TimePer', 'Pop',
'Shape_Egg', 'Shape_Light','Shape_Cone', 'Region_Midwest', 'Region_Northeast','Region_West']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.5 0.49905303 0.49905303 0.56218434 0.53566919 0.56488208
0.5 ], Average AUC 0.5229773823425118
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.5 0.91401515 0.73598485 0.5 0.50991162 0.58154875
0.52493139], Average AUC 0.6094845358365786
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.83320707 0.49905303 0.5 0.5 0.56017583
0.5353626 ], Average AUC 0.5733164559301773
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58661616 0.46262626 0.74242424 0.5 0.55498737 0.5
0.5353626 ], Average AUC 0.5545738053926753
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.46262626 0.74835859 0.67714646 0.53156566 0.57422879
0.5 ], Average AUC 0.5827632030530009
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.65409301 0.96136627 0.87688868 0.75584491 0.57416614 0.65166351
0.64822139], Average AUC 0.7317491286684215
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.56323916 0.95832544 0.87519202 0.77778567 0.57942182 0.65543799
0.59696247], Average AUC 0.7151949402146274
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65070497 0.9663326 0.88124211 0.70661564 0.59681713 0.64851811
0.58366202], Average AUC 0.7191275091616023
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.62672822 0.9664194 0.89812973 0.73673453 0.65482691 0.62620552
0.56737818], Average AUC 0.7252032149680384
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.65636311 0.96261048 0.83925715 0.72749369 0.56185816 0.63271215
0.58982519], Average AUC 0.7100171339572954
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.65605271 0.96432029 0.88345697 0.76452546 0.59967119 0.65531954
0.62362974], Average AUC 0.7352822719729781
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.65012889 0.96225537 0.81678767 0.81054293 0.59445234 0.65885186
0.61265176], Average AUC 0.7293815447372086
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.63895728 0.96401515 0.89879524 0.79544928 0.60833596 0.66531112
0.61234652], Average AUC 0.7404586515859014
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.63945444 0.96619581 0.88420402 0.77462647 0.60252788 0.62025163
0.6175462 ], Average AUC 0.7292580653031455
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65487689 0.96337595 0.83938868 0.78760785 0.60278567 0.63323331
0.62819506], Average AUC 0.7299233436878166
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.66434396 0.96509628 0.88560869 0.78687921 0.58458544 0.6444462
0.62992297], Average AUC 0.7372689637205119
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.61285511 0.96581439 0.87539457 0.77665983 0.61964699 0.63735786
0.61067965], Average AUC 0.7283440584294266
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.65656303 0.96332071 0.83424348 0.80978272 0.59932923 0.61040219
0.59538054], Average AUC 0.7241459846399715
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.63963331 0.96478851 0.88109217 0.7884154 0.60584754 0.64763108
0.61137772], Average AUC 0.7341122474596505
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.66171349 0.96379156 0.88194181 0.78493529 0.62472643 0.63402822
0.59745882], Average AUC 0.7355136602296884
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.64755366 0.9641572 0.8764073 0.79417877 0.59193761 0.64886292
0.59943889], Average AUC 0.7317909060710274
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.65016572 0.96377578 0.87969802 0.79370265 0.61210017 0.63726574
0.61781162], Average AUC 0.7363599575230209
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.65005787 0.96314184 0.8559133 0.78033986 0.60561606 0.62703727
0.60583564], Average AUC 0.7268488324947538
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.64510995 0.9667298 0.87843803 0.78861532 0.61835806 0.66433986
0.61272077], Average AUC 0.7391873979754393
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.65271465 0.965504 0.88393571 0.76981008 0.63645833 0.63449937
0.62083747], Average AUC 0.7376799438843985
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.63984112 0.96428609 0.8742661 0.77058607 0.60462963 0.65745157
0.61663844], Average AUC 0.732528431229623
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.62096749 0.96338121 0.89055135 0.79398674 0.59135364 0.66035744
0.6240093 ], Average AUC 0.7349438813221422
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.64287405 0.96376263 0.87514205 0.79468645 0.60432713 0.63549695
0.61028947], Average AUC 0.7323683882016789
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.65782565 0.96351273 0.88336227 0.79285038 0.59933712 0.63250158
0.61544668], Average AUC 0.7349766308114172
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.65014731 0.96276042 0.89067235 0.78414615 0.60430082 0.6456096
0.60274609], Average AUC 0.7343403903181678
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.64275568 0.96377841 0.88727641 0.77516835 0.59453914 0.64133765
0.59799232], Average AUC 0.7289782809084985
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.64011469 0.96434659 0.88231797 0.77488952 0.59142466 0.6436355
0.60624174], Average AUC 0.7289958107422222
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.64916088 0.965625 0.87783039 0.78293613 0.61944707 0.66171826
0.61431066], Average AUC 0.7387183420905484
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.65080755 0.96316025 0.89242424 0.77857481 0.62152515 0.64645715
0.61741879], Average AUC 0.7386239922007459
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.65294613 0.96558291 0.87798032 0.77628104 0.60712332 0.64645188
0.61306317], Average AUC 0.7342041101473483
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.64750631 0.96342066 0.86479377 0.77316656 0.59374474 0.6437908
0.61451239], Average AUC 0.7287050335884879
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.64765362 0.96362321 0.88147359 0.78485901 0.60781776 0.64272215
0.61218992], Average AUC 0.7343341796588974
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.64520728 0.96406513 0.87970854 0.77799611 0.59503893 0.64571752
0.61859197], Average AUC 0.7323322121749277
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.65195181 0.96448338 0.87187763 0.7928609 0.59996054 0.64042693
0.61508305], Average AUC 0.7338063203978862
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65765993 0.96548032 0.87645728 0.79056187 0.6208044 0.63676827
0.60550917], Average AUC 0.7361773195498583
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.57368213 0.9219697 0.79738268 0.65230166 0.51377052 0.63492841
0.48696762], Average AUC 0.6544289601907687
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.59337384 0.66853956 0.7872238 0.72457912 0.53156566 0.60611181
0.64971573], Average AUC 0.6515870755960774
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.5885101 0.62834333 0.85112847 0.7264678 0.53815499 0.63171984
0.5353626 ], Average AUC 0.6428124465515036
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.56717172 0.86832386 0.6418508 0.74229535 0.58893098 0.54430406
0.55622502], Average AUC 0.6441573981567046
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.61858691 0.83485375 0.67726484 0.69726957 0.43750789 0.54883133
0.56277571], Average AUC 0.6252985711240218
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.68723432 0.959617 0.81626684 0.72672822 0.61544876 0.6789561
0.65872957], Average AUC 0.7347115434007885
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.64648043 0.96101904 0.79835859 0.71786616 0.63049505 0.66804854
0.64666599], Average AUC 0.7241334009834973
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.68203914 0.96546191 0.7733428 0.75920139 0.62793298 0.69507528
0.65751392], Average AUC 0.737223917179144
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.66967593 0.96515941 0.77855114 0.72473169 0.58393045 0.68030112
0.58198453], Average AUC 0.7120477511651112
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.63264941 0.96775042 0.84752473 0.74648043 0.59047769 0.66317119
0.65638321], Average AUC 0.7292052978823599
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.66909196 0.97005997 0.76065867 0.75492161 0.59717487 0.68161981
0.64123274], Average AUC 0.7249656638822921
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.68656881 0.96664562 0.83149463 0.75328283 0.62738584 0.67637134
0.63429453], Average AUC 0.7394348004895852
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.66315762 0.96840804 0.7935343 0.76134785 0.61677715 0.68163298
0.64467796], Average AUC 0.7327908418427448
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.68244423 0.97051505 0.76443077 0.74742477 0.62544192 0.67311013
0.6491026 ], Average AUC 0.7303527799865265
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.66214489 0.9666088 0.77135943 0.75193866 0.63195497 0.66020478
0.63991092], Average AUC 0.7263032053240651
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.68693708 0.7223327 0.79943708 0.75192551 0.61266572 0.68258581
0.64153798], Average AUC 0.6996316960349899
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.6881734 0.96991793 0.81082965 0.75544245 0.59726168 0.67102285
0.65874284], Average AUC 0.7359129703270917
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.66461753 0.96842119 0.81591698 0.75354324 0.62339278 0.65206096
0.64426655], Average AUC 0.7317456057982162
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.66731376 0.9725826 0.79450495 0.76434922 0.60056292 0.68047484
0.64634483], Average AUC 0.7323047305086108
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.67731745 0.971633 0.806371 0.75586595 0.60166509 0.67835597
0.64069127], Average AUC 0.73312853231337
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.66967593 0.97608638 0.81414141 0.76166877 0.60747317 0.67762424
0.64615107], Average AUC 0.7361172815803771
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.67808291 0.96920507 0.77131734 0.75488215 0.62053872 0.66197094
0.64672704], Average AUC 0.7289605975420567
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.67330861 0.97491056 0.767127 0.75327231 0.61692971 0.66790377
0.63399194], Average AUC 0.726777700163094
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.67103062 0.96726904 0.78242845 0.75704966 0.62374527 0.67039903
0.64628113], Average AUC 0.7311718860061205
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.66042719 0.96777936 0.80057607 0.75842014 0.61806871 0.67608707
0.6360251 ], Average AUC 0.731054804887747
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.69057765 0.94537037 0.79826915 0.76818708 0.615625 0.68147242
0.63441928], Average AUC 0.7334172774750959
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.68842856 0.97309028 0.81421507 0.76065341 0.60795455 0.68336492
0.64921408], Average AUC 0.7395601215741853
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.6777541 0.96922085 0.81423348 0.76596433 0.60198074 0.67999052
0.64121151], Average AUC 0.7357650779061824
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.67888521 0.97181713 0.78352799 0.76574337 0.61455703 0.67410244
0.63763888], Average AUC 0.7323245786075808
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.67822759 0.97149095 0.84651199 0.74244266 0.60746528 0.67545799
0.64649878], Average AUC 0.7382993193656285
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.67783302 0.97376631 0.7618266 0.76413878 0.61847643 0.67219678
0.64398785], Average AUC 0.7303179675692932
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.66484638 0.97261942 0.77036248 0.74918192 0.60206229 0.67351284
0.63634626], Average AUC 0.7241330862399293
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.67907197 0.96912616 0.77275884 0.76389941 0.61863163 0.66984892
0.65292207], Average AUC 0.7323227123527103
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.66541982 0.96812658 0.7539536 0.75520833 0.62315867 0.67349442
0.6409567 ], Average AUC 0.7257597316828939
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.6696996 0.9740846 0.82524463 0.77392414 0.61495949 0.67782954
0.65289818], Average AUC 0.7412343112235481
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.67658617 0.9712963 0.78554556 0.76104535 0.61766362 0.67885871
0.65663536], Average AUC 0.7353758666050888
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.66151357 0.973619 0.79602799 0.74537826 0.61203178 0.67539745
0.65532682], Average AUC 0.731327838853833
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.67542351 0.9730403 0.75111006 0.760998 0.60368529 0.68303853
0.63471921], Average AUC 0.7260021280010948
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.68283617 0.97138047 0.82150673 0.75579493 0.62473695 0.67439987
0.64770912], Average AUC 0.7397663214045804
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.67586279 0.9748448 0.81030093 0.76832912 0.61506471 0.67416825
0.65043238], Average AUC 0.7384289972106558
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.56629314 0.74646991 0.71814499 0.67021254 0.57789615 0.59767583
0.55329738], Average AUC 0.6328557047651563
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.61463331 0.90310659 0.7921875 0.68858638 0.61435974 0.57378922
0.69187852], Average AUC 0.6969344665600629
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.64800873 0.91982323 0.77660196 0.61199495 0.55778093 0.60907033
0.57161968], Average AUC 0.6706999735078154
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.61380471 0.83382786 0.77201705 0.68637153 0.4472722 0.58619183
0.62887189], Average AUC 0.6526224385582383
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.61566183 0.84608586 0.77129367 0.67874053 0.63410932 0.55983628
0.55605249], Average AUC 0.665968567930085
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.68959122 0.76843697 0.69162195 0.74257418 0.63647412 0.67298115
0.62856134], Average AUC 0.6900344198717855
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.67693077 0.97158039 0.68273885 0.75996423 0.61802399 0.67276005
0.64214846], Average AUC 0.7177352468358337
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.67811185 0.96854482 0.70330387 0.73439867 0.6275726 0.68325174
0.66285428], Average AUC 0.7225768330935429
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.65054977 0.97031513 0.68763415 0.64968171 0.61486479 0.68138555
0.66663393], Average AUC 0.7030092922159803
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.67027567 0.97196444 0.70760995 0.73779987 0.64342908 0.67261266
0.64642711], Average AUC 0.7214455408923112
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.66917877 0.96178451 0.73035564 0.72710175 0.63313605 0.67776637
0.64580336], Average AUC 0.7207323496052804
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.67443182 0.9712016 0.68927294 0.74207439 0.61806345 0.67206254
0.64959629], Average AUC 0.7166718599564866
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.65967224 0.97130682 0.69127473 0.70600274 0.60798874 0.68348863
0.65902154], Average AUC 0.7112507758480122
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.67402146 0.97133312 0.71571444 0.73836279 0.61195023 0.68221468
0.66515286], Average AUC 0.7226785121144529
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.68558502 0.96744529 0.7281855 0.72043613 0.60807029 0.68639977
0.68075986], Average AUC 0.7252688356875705
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.68064499 0.96811869 0.74315288 0.70795191 0.62392414 0.67336281
0.65417753], Average AUC 0.72161899340581
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.6853509 0.96891572 0.71691656 0.72629682 0.58695023 0.66970415
0.65427308], Average AUC 0.7154867811696646
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.67989531 0.97288247 0.71632471 0.74303188 0.62368739 0.67694778
0.66251188], Average AUC 0.7250402022165642
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.67908512 0.97382418 0.75504787 0.74146675 0.6102904 0.67877711
0.66735058], Average AUC 0.7294060029530199
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.67923506 0.97269834 0.71716646 0.7472722 0.61380471 0.67854285
0.66107327], Average AUC 0.724256127439969
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.69410774 0.96357586 0.73960175 0.72770149 0.62433449 0.67996684
0.66223318], Average AUC 0.7273601935485999
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.67356639 0.97479745 0.72187763 0.73855745 0.62290878 0.68691303
0.64670581], Average AUC 0.7236180777681581
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.67194865 0.9741714 0.69162721 0.73751578 0.61208439 0.68565224
0.64964141], Average AUC 0.7175201551161312
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.67055187 0.97286932 0.75408249 0.75575284 0.60556345 0.67727153
0.65488356], Average AUC 0.7272821513369764
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.66303662 0.97198548 0.72662826 0.74429977 0.61999947 0.67682407
0.65825711], Average AUC 0.723004397208723
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.68212595 0.97123053 0.75207281 0.73653988 0.61941025 0.67375763
0.65158698], Average AUC 0.726674861365228
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.68389888 0.97275621 0.70424558 0.72442919 0.61254209 0.67107286
0.64346231], Average AUC 0.7160581597678443
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.68345434 0.97389257 0.68402515 0.72783302 0.62784617 0.68648136
0.66430881], Average AUC 0.7211202017652936
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.68755524 0.97170139 0.73082386 0.73082649 0.62677031 0.67661876
0.66075211], Average AUC 0.7264354520598192
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.68189447 0.9705545 0.7380524 0.71506208 0.61625368 0.6738945
0.64224401], Average AUC 0.7197079492023523
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.67952704 0.9738347 0.72035196 0.72966909 0.62595749 0.68236997
0.67153899], Average AUC 0.7261784626727085
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.68966751 0.97384785 0.72063868 0.7310343 0.62399779 0.68458886
0.65294861], Average AUC 0.7252462287633489
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.68113426 0.97227746 0.70490583 0.72485006 0.62099905 0.68906875
0.65626377], Average AUC 0.7213570267020264
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.68653725 0.97153304 0.73411721 0.72626 0.61122159 0.6777769
0.65668314], Average AUC 0.7234470182605064
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.6711069 0.97240109 0.74316604 0.73854956 0.61861059 0.67867709
0.66653042], Average AUC 0.7270059542956779
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.69027778 0.97568392 0.69612005 0.74404198 0.604506 0.67152295
0.64580867], Average AUC 0.7182801939941257
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.68010838 0.97364531 0.69412616 0.73526147 0.60711806 0.68825542
0.65515429], Average AUC 0.7190955826188672
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.68319129 0.97610217 0.7162642 0.73659775 0.60967224 0.68113813
0.66983496], Average AUC 0.7246858206399196
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.67746475 0.97406881 0.71156355 0.73594013 0.61480166 0.68211729
0.66005139], Average AUC 0.7222867977283532
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.67353483 0.97294297 0.74220328 0.72588384 0.62167508 0.67634502
0.66498829], Average AUC 0.7253676169946178
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.57195128 0.80360375 0.72486585 0.74748264 0.6264073 0.62980101
0.61340291], Average AUC 0.673930676765592
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.65614215 0.95354588 0.65062605 0.7078125 0.58220223 0.43620499
0.57417041], Average AUC 0.6515291732903432
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.52492635 0.69453914 0.72347433 0.67277199 0.61022201 0.60242419
0.66069372], Average AUC 0.6412931038814034
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.57160669 0.93420402 0.81267361 0.41962595 0.61661669 0.61331859
0.59319078], Average AUC 0.6516051903961397
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.55516098 0.91633523 0.81502262 0.70278041 0.59836122 0.5309302
0.58067068], Average AUC 0.6713230472773379
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.68755524 0.97602325 0.63889941 0.71480429 0.60148622 0.67674774
0.66552711], Average AUC 0.7087204653627596
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.68067919 0.95890941 0.64167193 0.62639941 0.60813342 0.67252053
0.68000074], Average AUC 0.6954735177138422
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.6556108 0.97087279 0.68920455 0.68012153 0.63659249 0.67460255
0.66000361], Average AUC 0.7095726148788541
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.6743529 0.97119634 0.70710227 0.68641625 0.62983481 0.6676932
0.69466548], Average AUC 0.7187516072211474
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.68707649 0.97217224 0.72197759 0.69202441 0.59350537 0.66217625
0.6702968 ], Average AUC 0.7141755926808724
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.68247843 0.97529724 0.6562842 0.71517519 0.63624263 0.68441777
0.67951767], Average AUC 0.7184875907313989
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.66849221 0.97309817 0.65508733 0.69842435 0.64005419 0.68093809
0.66744082], Average AUC 0.7119335950978621
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.69306871 0.96722433 0.68824705 0.69872685 0.60484007 0.6736734
0.6679584 ], Average AUC 0.7133912592954542
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.68555082 0.97469223 0.654627 0.69075652 0.62370844 0.689761
0.68375651], Average AUC 0.7146932183862781
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.65821759 0.9740767 0.69293192 0.67988742 0.61377578 0.67097284
0.67106388], Average AUC 0.7087037326563855
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.6746028 0.97117266 0.67547612 0.71180293 0.6190604 0.67826385
0.6662411 ], Average AUC 0.7138028350900842
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.68958596 0.93552452 0.63478535 0.69288984 0.5835201 0.69365393
0.67910361], Average AUC 0.7012947570289114
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.68367793 0.97482376 0.66124263 0.69375789 0.62053609 0.67278638
0.67105857], Average AUC 0.7111261777805298
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.68063973 0.97327704 0.68874947 0.68742372 0.60323285 0.68737892
0.66612962], Average AUC 0.7124044794813221
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.68701599 0.97396622 0.69302662 0.71122685 0.59982113 0.67900084
0.66925899], Average AUC 0.7161880923477185
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.68532723 0.92542614 0.67751736 0.6820181 0.62504209 0.66000737
0.67509303], Average AUC 0.7043473306856417
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.67815394 0.9745423 0.62726484 0.70255156 0.56687973 0.67640029
0.65696449], Average AUC 0.6975367353326886
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.68062395 0.97311658 0.6572259 0.7104456 0.61927083 0.68197778
0.66590401], Average AUC 0.7126520952576298
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.67514205 0.97053872 0.64281881 0.68573232 0.60368266 0.68313329
0.65882512], Average AUC 0.7028389964959484
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.66004577 0.97349011 0.67728588 0.69467066 0.60854903 0.67293378
0.66390005], Average AUC 0.7072678976075905
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.68176557 0.97082544 0.6720907 0.68744213 0.5962358 0.68623921
0.67475063], Average AUC 0.7099070684772247
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.68244949 0.97390835 0.66598537 0.69583333 0.58441183 0.676753
0.66508916], Average AUC 0.7063472201128006
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.67875368 0.97148832 0.67159354 0.66862374 0.6187842 0.67396031
0.67302537], Average AUC 0.7080327361718949
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.67378998 0.97406618 0.68103167 0.68733954 0.62068866 0.67626342
0.66602611], Average AUC 0.711315080921915
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.65976694 0.97278514 0.71171875 0.71983112 0.60371949 0.6834044
0.66177665], Average AUC 0.7161432135064091
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.68628998 0.97318497 0.64172717 0.70477168 0.61275779 0.6865419
0.67113554], Average AUC 0.7109155760405358
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.68352799 0.97434501 0.67226168 0.71863689 0.59970802 0.6905638
0.65646284], Average AUC 0.7136437468954094
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.68439867 0.97273253 0.66034564 0.69156408 0.62753577 0.68150927
0.66198899], Average AUC 0.7114392799738916
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.68880471 0.97440814 0.67264573 0.72035196 0.61422559 0.68017741
0.67125498], Average AUC 0.7174097887546227
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.68117635 0.97307976 0.71041404 0.70941709 0.62106745 0.67856391
0.66169437], Average AUC 0.7193447068738291
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.68997264 0.93938868 0.66579598 0.68456702 0.62658617 0.65909665
0.67094974], Average AUC 0.7051938424520027
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.69852431 0.9727299 0.65949337 0.71756103 0.63030566 0.69029006
0.66684627], Average AUC 0.7193929428262623
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.6749395 0.97413721 0.64014099 0.71000894 0.62225905 0.67697147
0.66961996], Average AUC 0.7097253031619688
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.67339015 0.97381366 0.70408512 0.70542403 0.60875421 0.6748526
0.67125498], Average AUC 0.7159392508032136
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.67580492 0.9737137 0.70764152 0.69435238 0.61116898 0.68227258
0.67189731], Average AUC 0.716693056933095
((30, 121, 2), 0.7412343112235481)
In [40]:
model = RandomForestClassifier(n_estimators=21, max_depth=1, min_samples_leaf=18, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())
0.92964360587
0.727705233125
In [146]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df.head
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[146]:
<bound method NDFrame.head of Features Importance Score
1 Month 0.285714
7 Pop 0.238095
5 Quarter 0.190476
4 Week 0.095238
0 Year 0.047619
3 Duration_Sec 0.047619
6 TimePer 0.047619
13 Region_West 0.047619
2 Lat 0.000000
8 Shape_Egg 0.000000
9 Shape_Light 0.000000
10 Shape_Cone 0.000000
11 Region_Midwest 0.000000
12 Region_Northeast 0.000000>
In [41]:
# 4th revision: remove the least important/correlated features
X = df[['Year', 'Month','Duration_Sec', 'Week', 'TimePer', 'Pop','Region_West']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57329545 0.90833333 0.74835859 0.65751263 0.5 0.55721204
0.54110905], Average AUC 0.6408315853189033
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.83320707 0.74488636 0.57929293 0.54583333 0.5
0.52041651], Average AUC 0.6155789812622919
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.90833333 0.74488636 0.65997475 0.55498737 0.56017583
0.54721118], Average AUC 0.6515693555605209
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485 0.90833333 0.74835859 0.65997475 0.55069444 0.55670141
0.53451058], Average AUC 0.6474368504173429
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.5 0.90833333 0.74242424 0.65751263 0.55069444 0.56488208
0.52493139], Average AUC 0.6355397306417734
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.65897254 0.95926978 0.89020412 0.7726694 0.57723327 0.61926195
0.59599102], Average AUC 0.7248002977225079
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.65807818 0.95922243 0.88576389 0.76861585 0.60715225 0.62224152
0.59599102], Average AUC 0.7281521627635115
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65589489 0.95888573 0.83426452 0.75863847 0.60956702 0.61344757
0.57289903], Average AUC 0.7147996037390626
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.66077178 0.95921454 0.88786301 0.77114373 0.57723327 0.61729838
0.59198841], Average AUC 0.7236447300845049
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.66112426 0.95852799 0.89185869 0.75433239 0.60589489 0.61296326
0.59558757], Average AUC 0.7257555775202237
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.67272201 0.95932765 0.88932029 0.76782407 0.61213699 0.63653664
0.59770036], Average AUC 0.7336525734302283
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.65560027 0.95924085 0.89044087 0.8030303 0.60258049 0.62043062
0.59599102], Average AUC 0.7324734881386112
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.65807818 0.95900936 0.8928346 0.76893676 0.59306082 0.63918983
0.59469043], Average AUC 0.7293999974369203
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.67746212 0.95809659 0.89211648 0.76922348 0.59194024 0.62241525
0.59599102], Average AUC 0.7296064533227451
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65948811 0.95834386 0.8822338 0.80081545 0.60258049 0.61383449
0.60174013], Average AUC 0.7312909027598353
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.67213805 0.95900147 0.89211911 0.76594066 0.60093645 0.62484734
0.59599102], Average AUC 0.7301391552309011
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.675 0.95862795 0.89599642 0.76802136 0.60258049 0.62024637
0.59492932], Average AUC 0.7307717007812015
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.64503893 0.95844907 0.88600852 0.77405566 0.60784144 0.6220836
0.59877267], Average AUC 0.7274642706043444
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.65560027 0.95918824 0.87994529 0.76847643 0.60128367 0.62271004
0.6055304 ], Average AUC 0.7275334772389463
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.65807818 0.95922243 0.89890309 0.79594381 0.60106797 0.63308854
0.59499302], Average AUC 0.7344710074985664
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.67647043 0.95904619 0.88832071 0.7702862 0.61712963 0.62008318
0.59599102], Average AUC 0.7324753357118226
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027 0.95900673 0.8946049 0.76901305 0.60258049 0.61828806
0.59756234], Average AUC 0.72809369229795
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.6587016 0.96341277 0.88813657 0.75742056 0.61421244 0.62597126
0.60174013], Average AUC 0.7299421897456732
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027 0.95896991 0.88939394 0.77253788 0.60119423 0.62008318
0.59599102], Average AUC 0.7276814895117264
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027 0.95926452 0.89406829 0.78978325 0.60135732 0.62424721
0.62809419], Average AUC 0.7360592938895858
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.65560027 0.96338647 0.89107218 0.77223274 0.60256208 0.62377606
0.59784369], Average AUC 0.7294962128155753
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.65560027 0.95895676 0.89624632 0.77105166 0.60119423 0.62017267
0.59796578], Average AUC 0.7287410988199335
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.67037826 0.95885943 0.89374474 0.77139362 0.59282407 0.63801853
0.59770036], Average AUC 0.731845573240016
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.6558186 0.96339173 0.89308186 0.79545981 0.60093645 0.63003527
0.60326898], Average AUC 0.7345703848752215
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.65634733 0.95892519 0.89488899 0.77554977 0.60119423 0.63805801
0.60174013], Average AUC 0.7323862361701956
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.67020202 0.95899621 0.88481955 0.77241688 0.60256208 0.62017267
0.63105634], Average AUC 0.7343179636136089
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.65560027 0.9630524 0.89122738 0.78762626 0.60119423 0.62024637
0.60357687], Average AUC 0.7317891120831584
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.66693234 0.95888047 0.89479693 0.76884207 0.6044455 0.61968836
0.59599102], Average AUC 0.7299395256931872
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.67176189 0.95889888 0.89810343 0.78462753 0.61716909 0.6342888
0.59782776], Average AUC 0.7375253392041617
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.65652094 0.96362058 0.89141677 0.77433186 0.58514047 0.6247789
0.59782776], Average AUC 0.7276624685424716
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.66619318 0.95899884 0.88906776 0.77356639 0.59282407 0.63907138
0.59599102], Average AUC 0.7308160934265933
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.6558186 0.95894623 0.8927741 0.76913142 0.60061816 0.62423142
0.59782776], Average AUC 0.7284782410758097
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66735848 0.96369949 0.89367635 0.77721223 0.61183712 0.63073542
0.59770036], Average AUC 0.7346027777452858
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.65862795 0.95892519 0.88408302 0.76942603 0.61717435 0.63788166
0.60077929], Average AUC 0.7324139256607458
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.6558186 0.95891993 0.89511785 0.77436606 0.59802189 0.62024637
0.59796578], Average AUC 0.7286366381747668
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.63887311 0.83485375 0.62899306 0.6775726 0.5648306 0.64635713
0.52722732], Average AUC 0.6455296499019408
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.6222722 0.83233375 0.83116056 0.67714646 0.55069444 0.55225837
0.54997691], Average AUC 0.659406100949045
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.61141625 0.9169718 0.78473011 0.67714646 0.51080072 0.56756686
0.66347803], Average AUC 0.676015746076901
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.66940236 0.94820602 0.78357008 0.69726957 0.52180661 0.60324805
0.57227528], Average AUC 0.6851111370883721
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.615383 0.94820602 0.78174979 0.68347538 0.57835911 0.59618341
0.54500549], Average AUC 0.6783374566841324
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.69453651 0.95781776 0.75669455 0.7654698 0.62606534 0.65456675
0.63686915], Average AUC 0.7274314094619834
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.69069339 0.96869213 0.78718434 0.74428662 0.5849537 0.68401506
0.63195878], Average AUC 0.7273977179662487
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.70278567 0.9643045 0.80931187 0.75772569 0.6093145 0.6682749
0.63753802], Average AUC 0.7356078795693032
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.70326968 0.9683186 0.72114636 0.76553556 0.62443708 0.67842967
0.64702166], Average AUC 0.7297369449360619
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.7089173 0.96689289 0.83337542 0.73354377 0.63910985 0.66973837
0.65842433], Average AUC 0.7442859887850186
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69948706 0.96897096 0.75886732 0.77781987 0.63238373 0.6706175
0.65448277], Average AUC 0.7375184570783814
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.690504 0.9352904 0.77148569 0.76566972 0.63102641 0.67120973
0.65692202], Average AUC 0.7317297103274077
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.70687605 0.96814499 0.7162379 0.75010522 0.64551768 0.65118183
0.65976738], Average AUC 0.7282615783352877
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.70265415 0.96732428 0.82441867 0.7628709 0.64370791 0.67691093
0.66634727], Average AUC 0.7491763007418905
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.69418666 0.96034564 0.80751263 0.76848169 0.62609165 0.6807091
0.65928431], Average AUC 0.7423730957396326
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.68640309 0.96873422 0.79761153 0.781008 0.63090278 0.67385765
0.66164925], Average AUC 0.7428809310247738
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.69753262 0.72207755 0.77500526 0.76676662 0.63596644 0.67285744
0.65433678], Average AUC 0.7035061015160099
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.69125105 0.97009417 0.79708018 0.75495318 0.635756 0.67247052
0.65611779], Average AUC 0.7396746971578915
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.69501789 0.96958912 0.84584386 0.76309186 0.63507997 0.66054169
0.65203289], Average AUC 0.7458853242374369
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.6923085 0.96941025 0.82859848 0.74042245 0.63739741 0.67133607
0.6538776 ], Average AUC 0.741907252491708
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.68543245 0.96935764 0.79227694 0.75905408 0.6170665 0.67029901
0.66200226], Average AUC 0.7364984109941985
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.68358586 0.97096749 0.78146307 0.7712621 0.62994266 0.67363656
0.6471995 ], Average AUC 0.736865317710735
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.70628157 0.97107008 0.83085543 0.75571075 0.64538878 0.65577227
0.64291288], Average AUC 0.743998822533697
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.69392624 0.96720328 0.80880419 0.75667088 0.62287458 0.66256054
0.65592933], Average AUC 0.7382812912751959
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.69892151 0.97251157 0.83161827 0.75169139 0.62890888 0.65780164
0.63175972], Average AUC 0.7390304255300422
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.70714173 0.97383733 0.79604903 0.75963279 0.62827757 0.66709307
0.64531233], Average AUC 0.7396205492871645
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.69481797 0.97022043 0.76325231 0.75745739 0.62192235 0.66280533
0.65659555], Average AUC 0.7324387617707163
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69397622 0.97170665 0.77754367 0.76033512 0.6206834 0.66359234
0.64397989], Average AUC 0.7331167544757556
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.69553872 0.9709596 0.77822233 0.7533065 0.63947811 0.66831175
0.64783387], Average AUC 0.7362358394076123
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.69589646 0.9692077 0.80825968 0.77203546 0.63607955 0.66975153
0.64869915], Average AUC 0.742847075545364
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.68722643 0.97407407 0.7844697 0.77137521 0.64036195 0.66722468
0.66195714], Average AUC 0.7409555976201171
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.6928267 0.9746291 0.76345749 0.75245949 0.62916667 0.66777216
0.6579041 ], Average AUC 0.7340308166005064
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.70580282 0.96910511 0.76809501 0.76566972 0.63636101 0.66431617
0.65872161], Average AUC 0.7382959211428849
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.69819813 0.96912879 0.78169192 0.75426662 0.61307344 0.67598179
0.64511591], Average AUC 0.7339223711903358
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.6890967 0.97109638 0.83774989 0.77587069 0.62129104 0.6758844
0.65217091], Average AUC 0.74616571458013
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.69085385 0.97054135 0.79502315 0.76147675 0.62872212 0.66049958
0.65859951], Average AUC 0.7379594718884829
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.69866635 0.97265362 0.78857323 0.76965225 0.62216961 0.66403717
0.65075885], Average AUC 0.7380730119517066
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69221117 0.97522359 0.7790483 0.76149253 0.64095907 0.66815382
0.66099365], Average AUC 0.7397260180956032
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.68602694 0.96812395 0.82931397 0.76172664 0.62694129 0.65985734
0.65071107], Average AUC 0.7403858855216952
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.69533354 0.97232744 0.77614952 0.7632076 0.63998316 0.66950937
0.65119415], Average AUC 0.7382435401322228
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.68209964 0.73397254 0.68238899 0.38550347 0.6268729 0.63943462
0.55750702], Average AUC 0.615397025771136
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.62254577 0.83428293 0.66913931 0.6478588 0.61027199 0.58856338
0.60800947], Average AUC 0.6543816646628884
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.67779619 0.83155513 0.78802083 0.73547454 0.61327599 0.63694199
0.56877432], Average AUC 0.6931198560792324
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.62340593 0.68424874 0.69510995 0.7459596 0.63017677 0.63852127
0.57530112], Average AUC 0.6561033402234947
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.60818603 0.95167824 0.79201915 0.64510732 0.58224169 0.55913877
0.63448563], Average AUC 0.6818366895774212
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.71414668 0.92401094 0.70344329 0.74855587 0.62906408 0.67254685
0.6665835 ], Average AUC 0.7226216009221093
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.68917035 0.97260101 0.67621791 0.720252 0.62556292 0.66597968
0.67016409], Average AUC 0.7171354219274537
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.70936185 0.97226694 0.73782618 0.76964962 0.64105114 0.6614761
0.66382839], Average AUC 0.7364943158328563
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.70451652 0.97062027 0.704385 0.73643466 0.6524516 0.67280743
0.66313828], Average AUC 0.7291933932967191
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.68021096 0.97245107 0.67987689 0.68015309 0.62717014 0.67095968
0.63884657], Average AUC 0.7070954867626652
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.69765099 0.9666009 0.71803451 0.7287642 0.64928451 0.68447305
0.67312092], Average AUC 0.7311327273452468
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.71542245 0.95863058 0.66466488 0.73414878 0.64554924 0.67825595
0.66737447], Average AUC 0.7234351922734836
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.69494423 0.9727036 0.64557292 0.75379051 0.64743003 0.66983839
0.66051323], Average AUC 0.7206847001492264
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70595802 0.97126999 0.71014047 0.74668824 0.6355061 0.68241998
0.66232608], Average AUC 0.7306155541369577
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.69696707 0.97052031 0.71014836 0.73296507 0.6313105 0.68222257
0.66923244], Average AUC 0.7276237595899306
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.70136258 0.97166982 0.677883 0.72451599 0.64169297 0.67521057
0.66532273], Average AUC 0.7225225242593256
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.69720907 0.96664562 0.68990425 0.72754893 0.64422085 0.67699516
0.66787612], Average AUC 0.7243428574829213
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.70137048 0.97298769 0.70582386 0.73938868 0.63872054 0.68123816
0.67331999], Average AUC 0.730407056069491
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.70332492 0.97199863 0.72697548 0.74869266 0.62783565 0.67438408
0.6619943 ], Average AUC 0.7307436736263927
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.70834122 0.97173822 0.74649095 0.73553504 0.6283407 0.66757212
0.66164925], Average AUC 0.7313810706170841
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.70248316 0.96021149 0.71471223 0.74829019 0.63455124 0.68001948
0.67000218], Average AUC 0.7300385672528039
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.69921612 0.94019886 0.65208333 0.7338831 0.64318182 0.68220678
0.66259416], Average AUC 0.7161948823558628
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70089699 0.97105692 0.68987005 0.74558607 0.62094118 0.67164403
0.67221317], Average AUC 0.7246012027335932
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.70795981 0.97261679 0.69682239 0.74778251 0.62987689 0.67528427
0.66644813], Average AUC 0.7281129714431476
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.69492319 0.97231429 0.74127999 0.74243739 0.64197969 0.67267583
0.67089401], Average AUC 0.7337863407812317
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.69943708 0.97471591 0.70036301 0.74372633 0.62666772 0.68419931
0.67050914], Average AUC 0.72851692594068
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.69801136 0.96507523 0.69113005 0.74809291 0.63622159 0.67710044
0.66625703], Average AUC 0.7259840877301807
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.69476799 0.97169613 0.66619581 0.73430924 0.64006471 0.66652716
0.6691714 ], Average AUC 0.7203903485347826
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.70082334 0.97239057 0.72987426 0.74053293 0.64505471 0.67093072
0.65733609], Average AUC 0.7309918042345195
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.69437079 0.97258523 0.76016677 0.74790614 0.63471959 0.67263108
0.66835389], Average AUC 0.735819070464796
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.69987374 0.97482902 0.6855061 0.74217172 0.61773464 0.68105654
0.66619598], Average AUC 0.7239096760291914
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.69764047 0.97284564 0.67972959 0.73174979 0.64362637 0.68018004
0.67050914], Average AUC 0.7251830046503469
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.69547033 0.9706229 0.69286616 0.74227694 0.63708176 0.67717941
0.66482108], Average AUC 0.7257597942390231
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70479009 0.97251947 0.6937237 0.73201547 0.63715015 0.67229417
0.66899356], Average AUC 0.7259266559449254
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70081282 0.97201441 0.73667666 0.74856376 0.64077231 0.68253843
0.65945418], Average AUC 0.7344046529877959
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.70453756 0.97242477 0.69580966 0.72805398 0.60549769 0.67532638
0.66072291], Average AUC 0.7203389928190627
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.69750105 0.97299558 0.6897359 0.73386206 0.63407776 0.6772768
0.66366913], Average AUC 0.7241597535973557
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.70042614 0.97394518 0.693871 0.72899306 0.64122738 0.67503159
0.66422122], Average AUC 0.7253879362982965
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.70458754 0.97050189 0.70500316 0.74387363 0.64367109 0.68326227
0.67158676], Average AUC 0.7317837630264015
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.7072943 0.97162511 0.71932607 0.74343434 0.63599274 0.67518688
0.66578457], Average AUC 0.7312348586677792
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.59601747 0.95280408 0.59833228 0.68731587 0.57264836 0.66653243
0.51943443], Average AUC 0.6561549879216795
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.6479456 0.83720539 0.64113268 0.71915246 0.64648569 0.61598231
0.5901145 ], Average AUC 0.6711455198401296
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.64408407 0.84958439 0.67189604 0.73719487 0.65228851 0.63150137
0.62735366], Average AUC 0.6877004144446851
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.67937184 0.9333649 0.61949968 0.63537458 0.6088831 0.60923615
0.55790781], Average AUC 0.6633768678927126
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.61474642 0.84851115 0.74073811 0.66805556 0.57173032 0.6399163
0.60217277], Average AUC 0.6694100906265376
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.71357849 0.9261916 0.67811185 0.72282723 0.64125105 0.66858023
0.66060612], Average AUC 0.7158780828057978
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.68932029 0.94222959 0.64214541 0.71136364 0.64988952 0.67304433
0.67378714], Average AUC 0.7116828439287108
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.70102062 0.97231955 0.66103483 0.72836437 0.63065551 0.66323437
0.67490989], Average AUC 0.7187913056632806
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.69588068 0.96991793 0.62700705 0.72310869 0.6442077 0.66488471
0.6702676 ], Average AUC 0.7136106240163197
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.70598432 0.97067024 0.65440867 0.73355429 0.6444576 0.66263424
0.66719929], Average AUC 0.7198440930838347
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70325652 0.92066235 0.5895202 0.70568708 0.64895833 0.68672089
0.66891924], Average AUC 0.7033892315831717
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70747317 0.97197759 0.66401252 0.72256681 0.65147569 0.66456096
0.67779241], Average AUC 0.7228370218419718
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.70325126 0.97283775 0.6175242 0.68895728 0.64683291 0.68755527
0.67134788], Average AUC 0.7126152237225792
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.71183186 0.97146991 0.68688447 0.73815499 0.63675558 0.66647189
0.67132665], Average AUC 0.7261279054747406
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.70964857 0.9737479 0.66626157 0.72787774 0.62436343 0.68161981
0.67609634], Average AUC 0.7228021933942194
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.7068971 0.92757523 0.61655619 0.68248895 0.64082229 0.67524479
0.67907441], Average AUC 0.704094135622322
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.69920297 0.96422559 0.63074758 0.70678136 0.6457097 0.68202516
0.68322831], Average AUC 0.7159886670554979
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.70570812 0.97066761 0.61964962 0.72681503 0.6308791 0.68528111
0.68522431], Average AUC 0.717746415595501
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.70979588 0.97383733 0.64828756 0.72516572 0.64678556 0.68349126
0.67546728], Average AUC 0.7232615137468172
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.71006681 0.97371107 0.64269255 0.72587069 0.64418403 0.68263319
0.67322178], Average AUC 0.7217685881737184
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.70846223 0.97350063 0.64594118 0.72001263 0.64275568 0.6882949
0.67210169], Average AUC 0.7215812774817042
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.70851484 0.96802399 0.64166141 0.69714857 0.62935606 0.6833386
0.66922448], Average AUC 0.713895419915176
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.70639205 0.94797717 0.63217593 0.69614373 0.63628472 0.67492367
0.6789762 ], Average AUC 0.7104104943122785
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.70311711 0.97320865 0.66521465 0.73873632 0.6484717 0.68044852
0.6787267 ], Average AUC 0.7268462342140766
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70974853 0.97390046 0.65369581 0.72107534 0.63920455 0.68446778
0.66851314], Average AUC 0.7215150868907738
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.70618687 0.97232481 0.61827125 0.7320181 0.62300084 0.66471889
0.6725768 ], Average AUC 0.7127282231228248
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70410354 0.96682976 0.60609217 0.71888152 0.63570602 0.68373605
0.67284223], Average AUC 0.7125987543867066
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.70358796 0.97381629 0.61328914 0.70892782 0.63949653 0.6893346
0.68110757], Average AUC 0.7156514146952163
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70424295 0.9729456 0.6523911 0.70165194 0.64700915 0.68286745
0.67990254], Average AUC 0.7201443889130018
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.70705755 0.97301662 0.67147517 0.72878262 0.64033302 0.68005896
0.67101345], Average AUC 0.7245339127694059
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70261732 0.95704703 0.63476957 0.69046717 0.62771465 0.67268636
0.6761335 ], Average AUC 0.7087765134305284
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.71239478 0.92818813 0.61961806 0.71823969 0.63621107 0.67996684
0.67434719], Average AUC 0.7098522496386969
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.70407723 0.97018098 0.63978062 0.6715646 0.63707123 0.67892188
0.6758548 ], Average AUC 0.7110644776281205
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.7109112 0.97211174 0.67841698 0.71948127 0.64878472 0.67780585
0.67746062], Average AUC 0.7264246273339285
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.704748 0.97161984 0.6615767 0.71187658 0.64161406 0.67620025
0.67476656], Average AUC 0.7203431423457288
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.70621054 0.95092593 0.60847275 0.69206124 0.61114531 0.68605759
0.67495766], Average AUC 0.7042615739341961
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.71064552 0.93938342 0.61304188 0.66884207 0.64004367 0.68664982
0.66969163], Average AUC 0.7040425706640389
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.70602378 0.96909985 0.640867 0.7197601 0.64583596 0.68146452
0.67136646], Average AUC 0.7192025258740211
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.71134259 0.9727983 0.63724221 0.70027357 0.64818761 0.67881133
0.67200614], Average AUC 0.7172373916321888
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.70591856 0.97347433 0.65760732 0.71523043 0.63804451 0.68232523
0.67131072], Average AUC 0.720558728045207
((24, 41, 2), 0.74917630074189046)
In [42]:
model = RandomForestClassifier(n_estimators=41, max_depth=2, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())
0.92964360587
0.737897513327
In [152]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[152]:
Features
Importance Score
5
Pop
0.326000
3
Week
0.257007
1
Month
0.189818
6
Region_West
0.071332
0
Year
0.068160
4
TimePer
0.045465
2
Duration_Sec
0.042219
In [43]:
# 5th revision: remove population count and year
X = df[[ 'Month','Duration_Sec', 'Week', 'TimePer', 'Region_West']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485 0.59880051 0.74835859 0.65997475 0.53566919 0.55721204
0.5353626 ], Average AUC 0.6012660743962204
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.90580808 0.74835859 0.65997475 0.54583333 0.53950569
0.53451058], Average AUC 0.6456296688644255
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667 0.91401515 0.74488636 0.65997475 0.54583333 0.55721204
0.52493139], Average AUC 0.6474670992652886
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.57329545 0.46395202 0.74488636 0.65056818 0.53156566 0.56017583
0.52493139], Average AUC 0.578482127263892
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485 0.90833333 0.74488636 0.65751263 0.53453283 0.53950569
0.52493139], Average AUC 0.640455296142462
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64454703 0.92128314 0.8215646 0.80209386 0.6129314 0.58474942
0.56086199], Average AUC 0.7068616356328962
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.64424979 0.92514468 0.80816498 0.74631208 0.61570918 0.590498
0.56086199], Average AUC 0.6987058138957613
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.64454703 0.92523674 0.78033723 0.74475484 0.61567761 0.59110339
0.56679159], Average AUC 0.6954926335456303
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.64581755 0.92519729 0.79427346 0.72795402 0.61241056 0.58730785
0.55735307], Average AUC 0.6929019730244013
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.66664825 0.92517098 0.81515152 0.74789825 0.61567761 0.58474942
0.56679159], Average AUC 0.7031553751910197
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.64454703 0.92483428 0.82131471 0.75677346 0.61241056 0.58474942
0.5710437 ], Average AUC 0.7022390251923812
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.64454703 0.92488689 0.81684028 0.74618319 0.66478851 0.59110339
0.56679159], Average AUC 0.7078772685865155
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.66790562 0.92524726 0.81203441 0.74966593 0.64780619 0.58711834
0.57131444], Average AUC 0.708727455124022
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.66664825 0.92483691 0.80962226 0.74771938 0.6160564 0.58730785
0.56679159], Average AUC 0.7027118078299104
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65287774 0.9248448 0.81460964 0.75177031 0.62761469 0.5984023
0.56679159], Average AUC 0.7052730086272531
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.67050189 0.92511837 0.81879209 0.75060764 0.6129314 0.59141398
0.56699332], Average AUC 0.7051940981962378
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.66664825 0.92515257 0.81817656 0.74892151 0.5959859 0.61757738
0.57131444], Average AUC 0.7062538012378996
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66814499 0.92482376 0.80731534 0.75008944 0.64538089 0.61466625
0.5710437 ], Average AUC 0.7116377670897415
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.66702704 0.92482376 0.81980745 0.75059186 0.61241056 0.59110339
0.56679159], Average AUC 0.7046508075475543
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.65287774 0.92516309 0.81817656 0.74877683 0.61333649 0.62232838
0.62338821], Average AUC 0.7148638990502274
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.67517624 0.92488163 0.8139415 0.74958439 0.6129314 0.59110339
0.56679159], Average AUC 0.7049157335937304
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.66664825 0.92480271 0.81933923 0.75260417 0.63448285 0.61843809
0.56494423], Average AUC 0.7116085051420883
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.66664825 0.92460017 0.81511469 0.75014731 0.6389415 0.60251895
0.56679159], Average AUC 0.7092517799706998
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.65562132 0.92458176 0.8146149 0.79296349 0.61333649 0.59110339
0.56679159], Average AUC 0.708430419139666
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.67639678 0.92520781 0.81441761 0.75110217 0.59274516 0.61874868
0.56679159], Average AUC 0.7064871151437916
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.66787931 0.92490267 0.81470697 0.74780619 0.6156513 0.59141398
0.56494423], Average AUC 0.7039006656660476
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.65298822 0.92471854 0.81192919 0.75030513 0.6160564 0.61843809
0.56494423], Average AUC 0.705625685878019
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.64454703 0.9383549 0.81977588 0.74779303 0.61241056 0.61454254
0.56679159], Average AUC 0.7063165067249008
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.66802662 0.92458176 0.81981797 0.75054451 0.63392782 0.61445304
0.56679159], Average AUC 0.711163330063117
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.64454703 0.92482376 0.81612479 0.74812973 0.64789825 0.59808907
0.59552652], Average AUC 0.7107341663351022
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.66668508 0.92460017 0.81860532 0.75081282 0.64775884 0.58711834
0.56679159], Average AUC 0.708910308655405
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.66668508 0.9383286 0.81427031 0.79277146 0.64586753 0.60202674
0.56679159], Average AUC 0.718105902246233
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.6754919 0.92460806 0.81653777 0.75173874 0.61241056 0.61898294
0.56679159], Average AUC 0.7095087963240324
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.66661669 0.92482376 0.81971801 0.78887048 0.64816393 0.62008318
0.57131444], Average AUC 0.7199414971665937
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.66705072 0.92482376 0.81855798 0.78582176 0.6129314 0.61843809
0.5710437 ], Average AUC 0.7140953433144924
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.66655619 0.92476589 0.81588279 0.78905987 0.60887521 0.61610339
0.57131444], Average AUC 0.7132225385520058
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.66670612 0.92098327 0.81524358 0.79350011 0.63172875 0.58730785
0.5710437 ], Average AUC 0.7123590551136194
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66793455 0.92482376 0.81793981 0.75352483 0.62809343 0.6146873
0.57131444], Average AUC 0.7111883048981219
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.66655619 0.93443024 0.8113952 0.78575863 0.64503893 0.61367656
0.5710437 ], Average AUC 0.7182713508726264
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65362742 0.9209254 0.81775568 0.7891572 0.64816393 0.61867235
0.56679159], Average AUC 0.7164419393081863
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.65084175 0.94157197 0.75356429 0.66868687 0.65835438 0.59793114
0.52927109], Average AUC 0.685745926829594
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.57697285 0.91611427 0.75153356 0.7219697 0.57470013 0.58598126
0.53889806], Average AUC 0.6665956898964751
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.66319444 0.95996423 0.76442287 0.68178662 0.5341593 0.5794483
0.52832618], Average AUC 0.6730431346754806
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.57368213 0.91888152 0.75951705 0.52630208 0.57835911 0.55624342
0.54962389], Average AUC 0.637515601475552
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.62649148 0.93700021 0.78007155 0.68743687 0.60228062 0.58036692
0.54716606], Average AUC 0.680116242651594
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.68915194 0.96767414 0.81563026 0.77795665 0.64592014 0.66362129
0.67983883], Average AUC 0.7485418922022697
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.70790983 0.97328493 0.8140283 0.80339594 0.63247317 0.65243209
0.67083561], Average AUC 0.7506228391785512
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.70516625 0.96041141 0.82668876 0.78357534 0.62638626 0.66167351
0.66355234], Average AUC 0.746779123424656
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.69795086 0.97238531 0.81780566 0.79707755 0.62602062 0.66559012
0.68017327], Average AUC 0.7510004854294207
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.68119476 0.96510943 0.81105061 0.767006 0.63594539 0.66051011
0.65464202], Average AUC 0.7393511879018917
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69588857 0.97038352 0.81464646 0.75764678 0.62940341 0.65936513
0.6613095 ], Average AUC 0.7412347687416724
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.68702125 0.96968645 0.81943918 0.78016098 0.63129998 0.65577753
0.65404747], Average AUC 0.742490407154091
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.68493266 0.97015993 0.81307607 0.7681634 0.62669402 0.65979154
0.65636463], Average AUC 0.7398831799768153
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.69462069 0.96773464 0.82021517 0.76597485 0.6272701 0.65702516
0.66295514], Average AUC 0.7422565353079278
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.70512942 0.96648779 0.81604061 0.78423295 0.63263889 0.65770689
0.66719398], Average AUC 0.7470615050586745
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.70048664 0.9694576 0.81728483 0.7959859 0.62823285 0.65648294
0.66022657], Average AUC 0.7468796174518609
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.70389047 0.97128051 0.81918666 0.78798664 0.63398569 0.66197884
0.6750718 ], Average AUC 0.7504829430703476
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.68989899 0.9705545 0.81858691 0.77905093 0.6270202 0.65819909
0.67660861], Average AUC 0.7457027479805772
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.69745107 0.96972327 0.81442024 0.78084228 0.63196023 0.65094757
0.67831264], Average AUC 0.7462367575550101
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.69407355 0.97088594 0.81895781 0.79094855 0.62658091 0.65458254
0.65914894], Average AUC 0.745025463377251
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.69558081 0.97055187 0.82099905 0.7940604 0.63718697 0.65619078
0.66267113], Average AUC 0.7481772876326874
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.69822969 0.96823706 0.81450705 0.790746 0.63752894 0.66136029
0.66526168], Average AUC 0.747981529491331
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.70038142 0.97157513 0.81950495 0.78239426 0.62624947 0.66250526
0.66871486], Average AUC 0.7473321924946527
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.69465751 0.9678346 0.82099905 0.79055924 0.63506418 0.65721994
0.68011753], Average AUC 0.7494931506636134
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.69924505 0.96895518 0.81596959 0.78600852 0.63171033 0.65854917
0.67526821], Average AUC 0.7479580084065952
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.69566498 0.96835543 0.8186553 0.76419665 0.62250368 0.66097073
0.66300822], Average AUC 0.7419078579629879
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.70444287 0.97043087 0.81762942 0.79149306 0.63303872 0.66595599
0.6739517 ], Average AUC 0.7509918038040334
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69903988 0.96909722 0.82250631 0.79167193 0.6285222 0.65959676
0.66747002], Average AUC 0.7482720456464265
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.69666456 0.97078862 0.81719539 0.7866451 0.63613479 0.66131554
0.66915812], Average AUC 0.7482717308130908
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.70267519 0.97072548 0.81579598 0.78125789 0.63077388 0.66320015
0.67109042], Average AUC 0.7479312853253599
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.70289089 0.97164352 0.81664825 0.78861006 0.6384496 0.6613366
0.67645466], Average AUC 0.7508619398465916
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.70412195 0.96998106 0.81671928 0.78264415 0.63592961 0.65504843
0.66409381], Average AUC 0.7469340409852817
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.69285827 0.96868424 0.81921822 0.79170349 0.6380866 0.6613945
0.67347394], Average AUC 0.7493456089591889
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.70127578 0.9717803 0.81712437 0.7787379 0.62867477 0.66199463
0.68103325], Average AUC 0.7486601423277154
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.69819287 0.96931555 0.81985217 0.78938342 0.63633996 0.6574542
0.67342351], Average AUC 0.7491373818067014
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.7025463 0.96932081 0.8168771 0.78223117 0.63474853 0.65871499
0.67287142], Average AUC 0.7481871887265726
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.69669087 0.96798453 0.8166693 0.78677399 0.62862742 0.66064435
0.66628092], Average AUC 0.7462387669255538
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69695128 0.96988373 0.81802925 0.79164036 0.63389625 0.6606812
0.672391 ], Average AUC 0.7490675832896568
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.69832176 0.96866846 0.81693761 0.78723695 0.63170244 0.6625816
0.67235119], Average AUC 0.7482571427572094
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.69723801 0.97067024 0.81981008 0.78688973 0.63194971 0.66010739
0.67584684], Average AUC 0.7489302850332403
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.65105219 0.90235953 0.80104956 0.75319076 0.58914141 0.56949621
0.5878637 ], Average AUC 0.6934504805678247
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.58252315 0.94086437 0.8044718 0.72699916 0.54304766 0.6213466
0.6080599 ], Average AUC 0.6896160921744094
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.59921612 0.94192445 0.80452967 0.76272885 0.60832281 0.66976995
0.54028093], Average AUC 0.7038246834499703
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.6636311 0.9391835 0.78021359 0.77322706 0.53693971 0.57022005
0.63682934], Average AUC 0.7000349076827239
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.5962358 0.94495213 0.7740504 0.71680082 0.58333859 0.62118604
0.55616928], Average AUC 0.6846761503460155
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.70351957 0.97218013 0.81790299 0.77919297 0.64714594 0.65919404
0.69616779], Average AUC 0.7536147765101082
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.69556766 0.96806082 0.82150673 0.77135154 0.63322285 0.65599073
0.69849292], Average AUC 0.7491704638075679
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.7059396 0.9684133 0.82450547 0.7802057 0.63438289 0.66126027
0.67235915], Average AUC 0.749580912448825
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.69114846 0.96773201 0.82318234 0.78646622 0.64643571 0.67358654
0.68516592], Average AUC 0.7533881733574412
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.70387205 0.9681029 0.82731218 0.7812237 0.6431634 0.66716414
0.67957341], Average AUC 0.7529159701266367
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.70799663 0.96956808 0.81600905 0.78487742 0.63781303 0.67658981
0.67965834], Average AUC 0.7532160510897921
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.70138363 0.97315341 0.81719802 0.76757418 0.64233481 0.6721573
0.67504525], Average AUC 0.7498352279828098
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.70506629 0.96662984 0.81658512 0.76705598 0.64148253 0.6704043
0.66911035], Average AUC 0.7480477719440636
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70202546 0.97016782 0.81864215 0.77428188 0.6447338 0.67008844
0.67267501], Average AUC 0.7503735090124062
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.68819971 0.97238005 0.81724011 0.78596117 0.64472064 0.66165245
0.69544053], Average AUC 0.7522278089755144
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.70457176 0.97224327 0.81685606 0.77792508 0.6481008 0.66894609
0.68900662], Average AUC 0.7539499547593689
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.70154935 0.97106745 0.81954703 0.78126841 0.64249 0.66754053
0.69690302], Average AUC 0.7543379710087835
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.69768255 0.96978378 0.82226957 0.77920349 0.64789299 0.67776637
0.68448112], Average AUC 0.7541542683625738
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.70059449 0.97084649 0.81738215 0.78157881 0.64464962 0.66858286
0.67315277], Average AUC 0.7509695986023128
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.70423506 0.96958912 0.82111742 0.77603904 0.64512574 0.670478
0.68157471], Average AUC 0.7525941551664694
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.70463752 0.97108849 0.81753735 0.7848932 0.64246107 0.66780112
0.6905222 ], Average AUC 0.7541344211732498
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.69898464 0.96928925 0.81517782 0.78887048 0.63806555 0.67042272
0.6897737 ], Average AUC 0.7529405929345486
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70323811 0.96984428 0.81901568 0.78822075 0.64301084 0.67083333
0.67641219], Average AUC 0.7529393112430594
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.70031829 0.96765572 0.81389152 0.77823811 0.64353956 0.6710518
0.68341677], Average AUC 0.75115882407962
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.69893203 0.96847643 0.82368213 0.77502367 0.64161406 0.67701885
0.68412014], Average AUC 0.7526953304348915
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.70535827 0.97050715 0.81983375 0.77451073 0.64050926 0.67367604
0.68180298], Average AUC 0.7523140268756895
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.69924769 0.97028093 0.82065446 0.79376052 0.63927294 0.66750895
0.67394905], Average AUC 0.7520963626625826
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.70316972 0.96940499 0.81919192 0.77972696 0.64360269 0.67027795
0.6981107 ], Average AUC 0.7547835617202623
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.69685659 0.96987058 0.8183528 0.78756839 0.63477746 0.67292061
0.67868423], Average AUC 0.7512900956926064
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.70412195 0.96997054 0.82021517 0.78206808 0.64122738 0.66962255
0.68428205], Average AUC 0.7530725311446452
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.70838331 0.97103851 0.81905777 0.78080545 0.64082229 0.67035165
0.69222623], Average AUC 0.754669314772177
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.70184133 0.97163563 0.81874474 0.77473169 0.64037774 0.67275479
0.68361849], Average AUC 0.7519577718108914
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.70513994 0.97140678 0.81582229 0.78013994 0.64007523 0.67458149
0.69094953], Average AUC 0.7540164568542502
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70767572 0.96949442 0.81868424 0.78154987 0.64347117 0.67333123
0.69153612], Average AUC 0.755106110030809
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70423506 0.96957334 0.82087542 0.77348222 0.64912668 0.66802748
0.69040541], Average AUC 0.7536750869645062
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.70157039 0.96976536 0.81486216 0.78465383 0.64193761 0.67022005
0.68712741], Average AUC 0.7528766867963582
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.70176505 0.96962858 0.8197338 0.78546402 0.64099327 0.673018
0.68551893], Average AUC 0.7537316625353059
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.7014415 0.96949179 0.81871843 0.78638468 0.64057239 0.67669773
0.69290572], Average AUC 0.7551731768284629
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.70051294 0.96977325 0.81865793 0.78270991 0.63941498 0.67046483
0.68436433], Average AUC 0.7522711702137798
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.70170455 0.97175926 0.8176794 0.77902988 0.6421533 0.67249158
0.67727748], Average AUC 0.7517279209799314
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.62856166 0.94150095 0.83935974 0.75795455 0.59738794 0.62086492
0.62951953], Average AUC 0.716449897445674
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.6397359 0.9553267 0.8156592 0.68522727 0.63660564 0.63766846
0.63070598], Average AUC 0.7144184497468403
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.62782776 0.9367056 0.79360532 0.76387311 0.65889625 0.53990051
0.60637976], Average AUC 0.7038840434038877
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.66869739 0.92477641 0.81233165 0.77307449 0.62854324 0.59138503
0.66074945], Average AUC 0.7227939533162819
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.60004998 0.95432713 0.80583176 0.73095013 0.58095013 0.59220362
0.64535214], Average AUC 0.7013806961943418
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.70129156 0.96938394 0.82892992 0.79168771 0.651381 0.67519478
0.70435881], Average AUC 0.7603182467764157
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.6899174 0.97011785 0.826744 0.78029251 0.66176347 0.67984839
0.69795941], Average AUC 0.7580918610840074
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.69324232 0.96822128 0.83010048 0.77957176 0.64834017 0.67831386
0.6904001 ], Average AUC 0.7554557096532598
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.69845065 0.96931029 0.83002683 0.78146044 0.63456965 0.66479785
0.68189588], Average AUC 0.7515016567642295
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.70575547 0.96916035 0.82345328 0.78803925 0.6530987 0.67033323
0.69514856], Average AUC 0.7578555480267333
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70470065 0.970131 0.82474221 0.79304503 0.65350642 0.67528953
0.70111532], Average AUC 0.7603614524173058
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70042351 0.96849747 0.82984796 0.79124842 0.65678662 0.68499684
0.70014121], Average AUC 0.7617060035587485
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.70364057 0.97198811 0.82932186 0.79315814 0.65226221 0.69006633
0.68835367], Average AUC 0.7612558428526085
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.70469013 0.9690604 0.82850905 0.78495633 0.66279461 0.67851653
0.70285916], Average AUC 0.7616266016814021
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.70556082 0.97191446 0.82213016 0.77875894 0.66671665 0.68425195
0.70324137], Average AUC 0.7617963340930533
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.68889941 0.97160669 0.82951126 0.78828388 0.66117687 0.68363603
0.70173376], Average AUC 0.7606925572030787
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.68656881 0.96892361 0.82334543 0.78640572 0.65322233 0.69392504
0.69928654], Average AUC 0.7588110690063655
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.70004998 0.97055713 0.82652041 0.78761837 0.65299348 0.68295431
0.69922814], Average AUC 0.7599888318040737
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.70542929 0.97216435 0.82978746 0.78937027 0.65279356 0.6876474
0.70811989], Average AUC 0.763616030735868
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.69037774 0.97064131 0.82202231 0.78651094 0.65884364 0.68545483
0.69815848], Average AUC 0.7588584644596782
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.70400621 0.97023885 0.82828283 0.78622422 0.6619818 0.67828753
0.70430307], Average AUC 0.7619035012608099
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.69311343 0.97198285 0.82564447 0.79025147 0.65465067 0.68371762
0.69667475], Average AUC 0.7594336093222551
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.69677241 0.97101221 0.82736216 0.7871028 0.65459543 0.68418614
0.69240672], Average AUC 0.759062553342371
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.69709859 0.96935238 0.83051084 0.79096696 0.65555293 0.67540798
0.69707289], Average AUC 0.7594232233729841
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70213068 0.97149884 0.82972959 0.79231902 0.66163457 0.68401506
0.70233893], Average AUC 0.7633809561379508
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.6999395 0.97163037 0.83007944 0.79264783 0.64971591 0.68667614
0.70229115], Average AUC 0.7618543342396141
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70184922 0.97124106 0.82934817 0.78815499 0.66458333 0.68357812
0.69659247], Average AUC 0.7621924801238092
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.69704335 0.97017835 0.82875631 0.79095907 0.65619476 0.68475995
0.69768337], Average AUC 0.7607964510967739
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70083123 0.9717803 0.82714646 0.784375 0.66006155 0.68173563
0.69886982], Average AUC 0.7606857139686626
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.69994476 0.97219855 0.82722275 0.79385259 0.65656566 0.67926669
0.70012263], Average AUC 0.7613105165008314
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70087069 0.96966014 0.82587858 0.78939394 0.65660774 0.68526269
0.70298126], Average AUC 0.7615221475174645
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.69282144 0.96990741 0.82849327 0.78716856 0.66182397 0.68130133
0.69987047], Average AUC 0.7601980636528046
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.69625947 0.97243266 0.82919823 0.78528251 0.65298822 0.67942198
0.69605897], Average AUC 0.7588060057859772
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.70169665 0.97088068 0.82359007 0.7899779 0.6563447 0.68192514
0.69824872], Average AUC 0.7603805529965612
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.69485743 0.9694576 0.82992424 0.79271886 0.65812553 0.67946147
0.69876896], Average AUC 0.760473438936487
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.69787195 0.97021254 0.83116319 0.79223748 0.65993266 0.68322015
0.6976117 ], Average AUC 0.7617499542676971
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.69874263 0.9714778 0.8286169 0.79122475 0.66024043 0.68330438
0.69631378], Average AUC 0.7614172370163474
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.69976063 0.9701915 0.82729903 0.78763941 0.6568366 0.68479154
0.68945253], Average AUC 0.7594244623689524
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.70557923 0.97026252 0.8249658 0.78771833 0.65812027 0.68890819
0.69677031], Average AUC 0.7617606637916848
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.69668824 0.97226168 0.82934817 0.78795244 0.65907513 0.6818646
0.70031373], Average AUC 0.7610719982856337
((24, 61, 4), 0.76361603073586803)
In [44]:
model = RandomForestClassifier(n_estimators=41, max_depth=2, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())
0.92964360587
0.741650801023
In [164]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[164]:
Features
Importance Score
2
Week
0.357023
4
Region_West
0.276114
0
Month
0.239704
3
TimePer
0.071609
1
Duration_Sec
0.055550
In [67]:
# 5th revision: remove population count and year
X = df[['Duration_Sec', 'Week', 'TimePer', 'Region_West']]
y = df['Hoax']
score_model={}
for n_depth in range(1, 5, 1):
for n_trees in range(1, 150, 20):
for n_min in range(6, 36, 6):
model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
score_model[n_min,n_trees,n_depth]=scores.mean()
print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)
n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.58603481 0.91398515 0.5447079 0.65382506 0.54471545 0.55953334
0.51711596], Average AUC 0.6171310958401701
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.61323259 0.46235058 0.73955063 0.66955949 0.54471545 0.55899469
0.52281457], Average AUC 0.5873168565993174
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.56778984 0.46235058 0.5447079 0.56273897 0.54471545 0.55953334
0.52281457], Average AUC 0.5378072357187941
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58603481 0.91398515 0.73371468 0.56273897 0.53262849 0.55953334
0.51711596], Average AUC 0.6293930580991137
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.56778984 0.9220297 0.61014851 0.66955949 0.53116699 0.53444335
0.52281457], Average AUC 0.6225646369761743
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933 0.90100771 0.81746559 0.74414644 0.59141562 0.61467442
0.59513957], Average AUC 0.7010612393250959
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933 0.93071068 0.78998128 0.74313521 0.61198734 0.58567545
0.5572982 ], Average AUC 0.6946239273675595
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.64571024 0.93109303 0.79289926 0.74313521 0.6126791 0.58457802
0.56230461], Average AUC 0.6960570678389824
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.66749092 0.93109303 0.81119697 0.74918749 0.65000654 0.61946185
0.5569886 ], Average AUC 0.7122036301351987
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933 0.93111316 0.8070615 0.75350409 0.60757516 0.59015077
0.56230461], Average AUC 0.699326945282106
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.64357933 0.93089179 0.81441178 0.75315946 0.61135344 0.58457802
0.55687533], Average AUC 0.699264163366381
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.67483681 0.93075344 0.81426085 0.75303369 0.61135344 0.59984898
0.56230461], Average AUC 0.7066274023709754
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.65188089 0.93091946 0.80931035 0.74692103 0.61135344 0.59090589
0.55687533], Average AUC 0.6997380561211737
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.66542239 0.93089179 0.80749416 0.74554757 0.61135344 0.61636588
0.55687533], Average AUC 0.7048500808466479
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.66542239 0.93077357 0.80954178 0.78675894 0.608848 0.58605804
0.56230461], Average AUC 0.707101046752399
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.66841165 0.93075344 0.81396905 0.78213797 0.61135344 0.61976138
0.56230461], Average AUC 0.7126702206246482
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.66675733 0.92729463 0.81519661 0.74871458 0.60757516 0.58962219
0.56230461], Average AUC 0.7024950172765624
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66570684 0.93091946 0.8181498 0.74861899 0.6082141 0.58567545
0.55665635], Average AUC 0.7019915713316035
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.6655746 0.93075344 0.80828403 0.78202226 0.60757516 0.58605804
0.56230461], Average AUC 0.7060817349576419
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.6667174 0.93075344 0.81326723 0.74832971 0.6082141 0.5892396
0.56230461], Average AUC 0.7026894416861437
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.64509142 0.93078111 0.81140324 0.74962519 0.64826833 0.5892396
0.56230461], Average AUC 0.7052447880343955
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.66666001 0.93091946 0.81410992 0.79223265 0.6509574 0.61791387
0.56230461], Average AUC 0.7192997035254451
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.66720646 0.93091946 0.81130262 0.75417824 0.6082141 0.61993003
0.56230461], Average AUC 0.7077222183044122
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.66747345 0.93091946 0.81025115 0.74941389 0.608848 0.58605804
0.56246571], Average AUC 0.7022042433707915
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.6667174 0.93091946 0.81441429 0.79301749 0.608848 0.61622492
0.56230461], Average AUC 0.7132065982240215
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.66548227 0.93091946 0.81408225 0.74903657 0.61198734 0.61598329
0.56230461], Average AUC 0.7071136846236575
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.66841165 0.93091946 0.81337036 0.75369526 0.64906574 0.61646656
0.55687533], Average AUC 0.712686339154064
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.66548227 0.93078111 0.81315906 0.74560543 0.64491769 0.61766216
0.56230461], Average AUC 0.7114160489748809
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.66735368 0.94305924 0.81266099 0.78718405 0.6082141 0.58567545
0.56230461], Average AUC 0.7094931621643613
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.66837672 0.93075344 0.81215035 0.74876992 0.6476621 0.59113242
0.56246571], Average AUC 0.7087586646567612
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.66846654 0.93075344 0.81355148 0.74845297 0.6082141 0.61603363
0.56230461], Average AUC 0.706825253323495
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.66726385 0.93078111 0.81201451 0.78824308 0.64720931 0.5892396
0.56230461], Average AUC 0.7138651533093741
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.64357933 0.93075344 0.81053288 0.79771643 0.65098255 0.61653956
0.56230461], Average AUC 0.7160584009205773
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.66857134 0.93077357 0.81218808 0.74766059 0.65444891 0.61977649
0.56230461], Average AUC 0.7136747972402208
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.66666001 0.93091946 0.81380303 0.787013 0.65098255 0.61598329
0.55687533], Average AUC 0.717462381864616
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.6655746 0.93078111 0.8117051 0.75402731 0.61135344 0.58605804
0.56230461], Average AUC 0.7031148877502732
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.66802739 0.93077357 0.81373008 0.78669353 0.63516009 0.58567545
0.56230461], Average AUC 0.7117663875831717
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66676731 0.93075344 0.81367222 0.79203141 0.65034865 0.61603363
0.56246571], Average AUC 0.7188674804295996
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.66720646 0.93077357 0.81561418 0.78468365 0.6082141 0.58567545
0.56230461], Average AUC 0.7077817181716706
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65199068 0.93077357 0.81166234 0.78452769 0.60773867 0.61648921
0.56230461], Average AUC 0.709355253148957
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.66347613 0.92233911 0.77091383 0.73847903 0.59258029 0.65151904
0.54021244], Average AUC 0.6970742671663025
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.62101765 0.78066892 0.77003089 0.7611688 0.5632798 0.58570817
0.6765159 ], Average AUC 0.6797700183446045
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.61336234 0.95862513 0.75721696 0.63432243 0.58843224 0.58312568
0.59929271], Average AUC 0.6763396406959105
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.6505709 0.55738348 0.76968124 0.71387245 0.62021553 0.56986332
0.54627854], Average AUC 0.6325522093727872
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.61118652 0.95862513 0.65083917 0.69560241 0.5736034 0.58434393
0.55124217], Average AUC 0.6607775322202111
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.69671231 0.97245029 0.82167653 0.80499879 0.63530347 0.64565682
0.63597876], Average AUC 0.7446824252248828
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.69943459 0.97278989 0.81057061 0.79100006 0.6547558 0.65752976
0.647897 ], Average AUC 0.7477111012421787
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.69062151 0.96687092 0.8069483 0.79311811 0.6458283 0.65507564
0.63923834], Average AUC 0.7425287314603359
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.69941961 0.97640717 0.81814729 0.79344261 0.65468033 0.64194165
0.66118453], Average AUC 0.7493175981354786
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.69750828 0.96774129 0.81228618 0.79027057 0.64122495 0.65749453
0.64399557], Average AUC 0.7443601944367569
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69926491 0.97462117 0.81729453 0.79304013 0.65314085 0.66236754
0.6564424 ], Average AUC 0.7508816463166799
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.7028655 0.97455073 0.81765173 0.79330929 0.64961915 0.6586574
0.65273226], Average AUC 0.7499122956581336
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.69225587 0.97360994 0.81235662 0.79446138 0.63915469 0.66269476
0.66055274], Average AUC 0.7478694285073578
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.69399503 0.9732301 0.81883653 0.78642186 0.62867514 0.6556898
0.65511088], Average AUC 0.7445656188659501
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.69733861 0.97682222 0.81000463 0.78169021 0.63784412 0.65267185
0.65291601], Average AUC 0.7441839503577415
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.69437181 0.9766562 0.82214441 0.79884086 0.6374844 0.66026077
0.66291122], Average AUC 0.7503813816797825
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.7005225 0.97636944 0.81851203 0.79515314 0.63914966 0.65705908
0.6497118 ], Average AUC 0.7480682349433215
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.70114131 0.97751902 0.81427091 0.80179908 0.64480953 0.66317048
0.64457198], Average AUC 0.7496117569708998
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.70395091 0.97776302 0.8134257 0.79330929 0.65096998 0.66109895
0.64920587], Average AUC 0.749960530305856
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.70091175 0.96731365 0.81137306 0.79618198 0.64896261 0.66348008
0.63349191], Average AUC 0.7459592907791156
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.70384612 0.97703353 0.81507838 0.79826984 0.64146895 0.65955347
0.64741876], Average AUC 0.7489527218110407
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.69957681 0.97674676 0.81013795 0.79586754 0.64977008 0.65546578
0.64704372], Average AUC 0.7478012359992542
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.69791251 0.9783768 0.81315654 0.78833364 0.64153938 0.66135065
0.65696343], Average AUC 0.7482332784370836
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.70337951 0.97591162 0.81411243 0.80704389 0.64949086 0.65906517
0.65840066], Average AUC 0.7524863063664251
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.6987484 0.97787119 0.81117685 0.79662974 0.64005776 0.65120189
0.64570968], Average AUC 0.7459136442661684
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.70001098 0.97534814 0.81111899 0.78628602 0.63676749 0.65390017
0.65215082], Average AUC 0.745083231657374
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.69751577 0.97774038 0.81344834 0.79167673 0.64975248 0.66229706
0.6578897 ], Average AUC 0.7500457802120097
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69793746 0.97643232 0.81559657 0.80381399 0.64196702 0.65470311
0.64733318], Average AUC 0.7482548092887275
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.70090676 0.97739827 0.81551859 0.8006168 0.65088193 0.66293388
0.66425281], Average AUC 0.7532155784204039
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.69596375 0.97607764 0.81641411 0.80669675 0.64710114 0.65087971
0.64998364], Average AUC 0.7490166775714081
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.70139083 0.97734796 0.81583303 0.79199368 0.64614777 0.65755493
0.65093509], Average AUC 0.7487433279658414
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.69727124 0.97593426 0.81255786 0.79985209 0.64044766 0.65294621
0.64723502], Average AUC 0.7466063321262125
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.70149563 0.97615813 0.81566701 0.78856255 0.64678922 0.65749704
0.66318307], Average AUC 0.7499075208410717
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.6948459 0.97573805 0.81512869 0.7896593 0.65207428 0.65104332
0.64867981], Average AUC 0.7467384768578232
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.70116377 0.97768252 0.81326723 0.78737775 0.64925189 0.65892421
0.64190893], Average AUC 0.7470823284515608
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.70222922 0.97628391 0.81534251 0.79906474 0.63904904 0.65558408
0.65872033], Average AUC 0.7494676902744979
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.70592712 0.97714924 0.81672855 0.79417965 0.64869094 0.66523446
0.65389766], Average AUC 0.7516868014289219
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69955436 0.97546134 0.81466081 0.79589018 0.64666093 0.65085706
0.6518085 ], Average AUC 0.7478418831610344
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.7064037 0.97648515 0.81478659 0.79252445 0.64069669 0.65453699
0.65702635], Average AUC 0.7489228460918724
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.70267586 0.97725992 0.81192898 0.7931332 0.64440956 0.65816406
0.63750661], Average AUC 0.7464397425992051
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.64873693 0.94661616 0.79645617 0.80745643 0.64338827 0.58603287
0.63327041], Average AUC 0.7231367476745595
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.65839588 0.95425823 0.74315282 0.6889766 0.60432263 0.66008709
0.61978907], Average AUC 0.7041403308853008
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.67222932 0.92543568 0.80144188 0.72969492 0.62478367 0.62636865
0.66094289], Average AUC 0.720128144181185
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.58488452 0.94195243 0.79940182 0.75330285 0.62624014 0.56629414
0.67192731], Average AUC 0.7062861712802961
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.58278605 0.96196571 0.7784251 0.74489354 0.65157621 0.61610411
0.6004581 ], Average AUC 0.7051726886863351
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.71747245 0.97880192 0.80708665 0.7794967 0.65210446 0.66237761
0.66658109], Average AUC 0.7519886978326733
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.69134013 0.97769762 0.81211764 0.78119717 0.65506017 0.66299932
0.68527021], Average AUC 0.7522403228936977
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.68519193 0.98035901 0.81809195 0.7770617 0.65796305 0.67451736
0.67456014], Average AUC 0.7525350202757684
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.68768465 0.97816298 0.81262074 0.76664 0.66578876 0.67016789
0.66984319], Average AUC 0.7501297442065498
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.69424705 0.98001942 0.8061358 0.79613922 0.65799072 0.66432832
0.67938534], Average AUC 0.7540351231833661
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.70680543 0.97905598 0.80797462 0.78778274 0.65297231 0.66778424
0.67170581], Average AUC 0.7534401626123325
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.69313668 0.97793659 0.81156927 0.78738278 0.66177151 0.67877873
0.67496791], Average AUC 0.7550776367890099
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.69366566 0.97823593 0.81183339 0.79233579 0.6649184 0.66705933
0.67697148], Average AUC 0.7550028547853321
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70496896 0.97705868 0.80911917 0.7932137 0.66299907 0.66689572
0.67460293], Average AUC 0.7555511770331352
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.70291291 0.97542361 0.81437656 0.78507607 0.66265445 0.66373682
0.68566287], Average AUC 0.7556918970162817
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.696353 0.9761204 0.80953172 0.78903294 0.65440363 0.66991618
0.68604294], Average AUC 0.7544858302288694
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.69962422 0.97869124 0.81387346 0.77916969 0.6640908 0.67839865
0.68126306], Average AUC 0.7564444446707127
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.69846644 0.97808249 0.80916948 0.78106134 0.65409422 0.67611568
0.67910846], Average AUC 0.7537283026266559
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.69940464 0.97997666 0.81421054 0.78342087 0.66373611 0.67609051
0.68176647], Average AUC 0.7569436863259413
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.69944956 0.97773787 0.81113157 0.7791068 0.66003079 0.66953862
0.68207858], Average AUC 0.7541533980983843
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.69814955 0.98009992 0.81621036 0.78135817 0.66500392 0.6751592
0.67851192], Average AUC 0.7563561480653457
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.70302519 0.97859062 0.81179315 0.77981868 0.65771402 0.67826525
0.68078733], Average AUC 0.7557134626138547
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70438259 0.97844975 0.81221575 0.77380413 0.65715306 0.67458783
0.68327418], Average AUC 0.7548381851992554
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.69497066 0.97770768 0.81065866 0.77823895 0.65803852 0.66852929
0.68809182], Average AUC 0.7537479385986531
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.70211195 0.97885223 0.80603518 0.78625332 0.65972641 0.6726497
0.67343502], Average AUC 0.7541519729518311
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.70528336 0.98013765 0.80912672 0.77985893 0.65815423 0.66742933
0.68281104], Average AUC 0.7546858948719379
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.70282807 0.97923207 0.80863117 0.78541063 0.65860953 0.67347781
0.68588437], Average AUC 0.7562962358030448
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.70571253 0.97854785 0.80930532 0.78293035 0.67025879 0.66923154
0.67398626], Average AUC 0.7557103790333172
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.69694187 0.97858559 0.8103568 0.782792 0.66632959 0.6703315
0.68057338], Average AUC 0.7551301037503835
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.70182999 0.97840447 0.80639741 0.78695263 0.66346696 0.66807622
0.6768557 ], Average AUC 0.7545690520515824
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.70351425 0.97859565 0.81290751 0.78255303 0.66145204 0.67484205
0.68083516], Average AUC 0.7563856709221343
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.69741347 0.97810764 0.80626912 0.78400447 0.65741467 0.66924413
0.67230235], Average AUC 0.7521079780414698
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.70095916 0.98011249 0.81317415 0.77695605 0.6533597 0.67547132
0.67105389], Average AUC 0.7530123939184082
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70135091 0.97985088 0.80950153 0.77961493 0.66412099 0.67147172
0.68620655], Average AUC 0.7560167858024922
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70028296 0.97925471 0.81209752 0.77808299 0.6558601 0.67017292
0.68177402], Average AUC 0.7539321737730275
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.69916261 0.97796678 0.80886259 0.7778239 0.66147972 0.66791261
0.6819477 ], Average AUC 0.7535936986430295
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.70355667 0.97827115 0.8140093 0.78320454 0.66368832 0.66712477
0.6708047 ], Average AUC 0.7543799215628502
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.703906 0.97860571 0.8097883 0.78138835 0.65558088 0.67051524
0.68292935], Average AUC 0.7546734035856852
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.7041131 0.97885978 0.81220569 0.78830093 0.65587268 0.67063354
0.68279091], Average AUC 0.7561109465976392
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.69799235 0.97986849 0.81264338 0.77731577 0.65751278 0.67391075
0.67937275], Average AUC 0.7540880381901138
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.65731795 0.95910811 0.80514721 0.72000523 0.57069548 0.6434242
0.63169221], Average AUC 0.7124843417684606
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.67332222 0.96185754 0.76635324 0.7622882 0.60488358 0.64315739
0.58868081], Average AUC 0.7143632831020471
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.59551661 0.9663955 0.75563471 0.7588797 0.58724241 0.64100028
0.65558408], Average AUC 0.7086076122036395
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.68362993 0.96012185 0.76405659 0.74269249 0.60054435 0.62367288
0.66150167], Average AUC 0.7194599666106403
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.67962761 0.92506591 0.77862634 0.72284523 0.69670168 0.59045282
0.63666088], Average AUC 0.7185686376354872
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.70030042 0.97776302 0.81168749 0.78378562 0.66013644 0.67086259
0.69337008], Average AUC 0.7568436678046223
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.70101655 0.97905598 0.82404864 0.78161223 0.67786314 0.67786
0.67663923], Average AUC 0.7597279676116573
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.69307929 0.98039171 0.82163628 0.78739787 0.6761526 0.67351809
0.68209117], Average AUC 0.7591810005412256
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.70128853 0.97958172 0.81662038 0.78267377 0.67269128 0.67555942
0.69148733], Average AUC 0.7599860601959223
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.6883858 0.9793176 0.81406715 0.79044917 0.68489646 0.68189987
0.69510181], Average AUC 0.7620168378323454
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70319237 0.97971756 0.81648706 0.79302252 0.67345599 0.67950867
0.69032445], Average AUC 0.762244088300155
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70155302 0.97798438 0.81829319 0.78116196 0.67996106 0.67982582
0.68184198], Average AUC 0.7600887721664737
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.69337123 0.97770516 0.80648545 0.79013725 0.68182504 0.67028367
0.69113494], Average AUC 0.7587061057441974
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.69636298 0.97833404 0.82069548 0.77890304 0.67262839 0.67606786
0.69030683], Average AUC 0.7590426610104258
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.69433188 0.97741337 0.81278425 0.78957377 0.67457035 0.67532533
0.69340532], Average AUC 0.7596291825842824
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.69725128 0.97890506 0.81251006 0.79052715 0.67934728 0.67096579
0.68717813], Average AUC 0.7595263923183023
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.69527258 0.97928489 0.81751087 0.78821792 0.67591363 0.67879886
0.68179919], Average AUC 0.7595425629567425
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.7022941 0.97827115 0.81176548 0.78355168 0.67755122 0.67619623
0.69004002], Average AUC 0.7599528379106432
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.69966414 0.98005464 0.80877707 0.79202135 0.67051286 0.67126784
0.6878074 ], Average AUC 0.7585864705125038
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.69928487 0.97793659 0.80971786 0.79351556 0.68380977 0.67915125
0.68307785], Average AUC 0.7609276786610721
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.69674974 0.98009488 0.81348356 0.79028063 0.6797246 0.67356843
0.67949609], Average AUC 0.7590568468583182
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.70051751 0.97711905 0.81820766 0.78927191 0.68016733 0.68380528
0.68028644], Average AUC 0.7613393123004932
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.69790253 0.97915409 0.81892709 0.78592379 0.6744647 0.67297944
0.6895643 ], Average AUC 0.7598451334473112
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.69707661 0.97880695 0.81541294 0.78678912 0.68200364 0.68064135
0.68776209], Average AUC 0.7612132434480969
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70247375 0.97867866 0.81754608 0.79176226 0.67325475 0.67436633
0.68621662], Average AUC 0.7606140641509852
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.70274573 0.97865854 0.81832337 0.77607311 0.67819518 0.6716907
0.68608321], Average AUC 0.7588242639625639
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70148565 0.98033889 0.8184416 0.78031927 0.66931045 0.67761585
0.6863475 ], Average AUC 0.7591227437341225
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.69689945 0.97997414 0.81352632 0.78695766 0.67786062 0.67641773
0.68220444], Average AUC 0.7591200516684108
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70348431 0.97897549 0.81753351 0.78194176 0.67315916 0.68028644
0.68807672], Average AUC 0.7604939125179458
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.69617834 0.9784095 0.81578524 0.78958132 0.66648555 0.67700672
0.6866571 ], Average AUC 0.7585862530736526
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70542059 0.97967731 0.81197175 0.78022368 0.67631862 0.67583126
0.69103677], Average AUC 0.7600685688774153
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.7003653 0.97890254 0.81739767 0.79383251 0.67769208 0.67657379
0.68966498], Average AUC 0.7620612667214617
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.69684456 0.9796748 0.8183611 0.78945555 0.67165993 0.67805633
0.6913212 ], Average AUC 0.7607676376695307
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.70029044 0.97995402 0.81452497 0.78541063 0.67726193 0.68282111
0.69002744], Average AUC 0.761470077834229
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.69660751 0.97935784 0.81890445 0.78787582 0.67690976 0.67184424
0.68634247], Average AUC 0.7596917290682094
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.69487085 0.97891763 0.81749829 0.79086171 0.67646704 0.67129804
0.68906592], Average AUC 0.7598542115700424
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.70211444 0.97900064 0.81280689 0.794318 0.67723426 0.67824259
0.69017846], Average AUC 0.7619850413216771
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.70095666 0.97946601 0.81676125 0.79072084 0.67742041 0.67067633
0.68909109], Average AUC 0.7607275139325723
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.69986376 0.97895285 0.8146105 0.79239364 0.6733604 0.67672984
0.6887085 ], Average AUC 0.7606599287882635
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.69970906 0.97903838 0.81621036 0.79468023 0.67836875 0.68040223
0.68267261], Average AUC 0.7615830863588452
((6, 41, 4), 0.762244088300155)
In [70]:
model = RandomForestClassifier(n_estimators=81, max_depth=4, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
# print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
scores_acc=cross_val_score(model, X, y, scoring='accuracy', cv=7)
print (scores.mean())
print (scores_acc.mean())
0.757576490633
0.942161176283
In [71]:
features = X.columns
feature_importances = model.feature_importances_
features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)
features_df
/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[71]:
Features
Importance Score
1
Week
0.538725
2
TimePer
0.175699
3
Region_West
0.168867
0
Duration_Sec
0.116708
Content source: voyagenius/ds
Similar notebooks: