In [72]:
# Part4. Hoax Prediction using Logistic Regression and Random Forest
%matplotlib inline import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm import pandas as pd import numpy as np import seaborn as sns import statsmodels.formula.api as smf import sklearn.linear_model

In [56]:
df = pd.read_csv("ufo_reports.csv", sep='\t')
df=df[(~df.Long.isnull()) & (~df.Lat.isnull())]

In [57]:
shapes = pd.get_dummies(df.Shape,prefix='Shape').iloc[:,0:]

In [58]:
# df=pd.concat([df,shapes],axis=1)
cols_to_keep=['Date', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat', 'Long', 'Shape',
       'Duration_Sec', 'Summary', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'state', 'state abbr', 'Region',
       'Hoax', 'ASTR','HOL', 'Pop', 'Milit_Share']
df = df[cols_to_keep].join(shapes.ix[:, 'Shape_Chevron':])

In [59]:
regions = pd.get_dummies(df.Region,prefix='Region').iloc[:,0:]

In [60]:
cols_to_keep=['Date', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat', 'Long','Shape',
       'Duration_Sec', 'Summary', 'WeekDay', 'Week', 'Quarter', 'TimePer',
       'state', 'state abbr', 'Region', 'Hoax', 'ASTR','HOL', 'Pop', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle']
df = df[cols_to_keep].join(regions.ix[:, 'Region_Midwest':])

In [61]:
df.columns
df = df.reset_index()

In [62]:
df_copy = pd.DataFrame(df)
df_copy.set_index('Date',inplace=True)
df_copy.index = pd.to_datetime(df_copy.index, unit='s')
df_copy.head()


Out[62]:
index Year Month Day Time City State Lat Long Shape ... Shape_Other Shape_Oval Shape_Rectangle Shape_Sphere Shape_Teardrop Shape_Triangle Region_Midwest Region_Northeast Region_South Region_West
Date
2015-12-31 0 2015 12 31 23:59 Eugene OR 44.052069 -123.086746 Fireball ... 0 0 0 0 0 0 0 0 0 1
2015-12-31 1 2015 12 31 15:00 Monmouth OR 44.848449 -123.233987 Egg ... 0 0 0 0 0 0 0 0 0 1
2015-12-30 2 2015 12 30 22:00 Portland OR 45.523447 -122.676207 Light ... 0 0 0 0 0 0 0 0 0 1
2015-12-30 3 2015 12 30 19:30 Springfield OR 44.046237 -123.022028 Changing ... 0 0 0 0 0 0 0 0 0 1
2015-12-30 4 2015 12 30 16:50 Orient OR 45.467337 -122.352587 Cylinder ... 0 0 0 0 0 0 0 0 0 1

5 rows × 47 columns


In [63]:
#astronomical events
ASTR=[
'2014-05-06','2014-01-01','2014-01-02','2014-01-03','2014-01-05','2014-01-16','2014-01-30',
'2014-02-14',
'2014-03-01','2014-03-16','2014-03-20','2014-03-30',
'2014-04-08','2014-04-15','2014-04-23','2014-04-22','2014-04-29',
'2014-05-05','2014-05-10','2014-05-14','2014-05-24','2014-05-28',
'2014-06-07','2014-06-13','2014-06-21','2014-06-27',
'2014-07-12','2014-07-26','2014-07-29','2014-07-28',
'2014-08-10','2014-08-12','2014-08-18','2014-08-25','2014-08-29','2014-08-13',
'2014-09-09','2014-09-23','2014-09-24',
'2014-10-04','2014-10-07','2014-10-08','2014-10-09','2014-10-21','2014-10-20','2014-10-23',
'2014-11-05','2014-11-06','2014-11-17','2014-11-18','2014-11-22',
'2014-12-06','2014-12-13','2014-12-14','2014-12-21','2014-12-22','2014-12-23','2014-12-22',
'2015-01-03','2015-01-04','2015-01-05','2015-01-20',
'2015-02-03','2015-02-06','2015-02-18','2015-02-22','2015-2-024',
'2015-03-05','2015-03-06','2015-03-20',
'2015-04-04','2015-04-13','2015-04-18','2015-04-22','2015-04-23','2015-04-25',
'2015-05-04','2015-05-05','2015-05-06','2015-05-07','2015-05-18','2015-05-23',
'2015-06-02','2015-06-06','2015-06-16','2015-06-21','2015-06-24',
'2015-07-01','2015-07-02','2015-07-14','2015-07-16','2015-07-28','2015-07-29','2015-07-31',
'2015-08-12','2015-08-13','2015-08-14','2015-08-29',
'2015-09-01','2015-09-04','2015-09-13','2015-09-23','2015-09-28',
'2015-10-01','2015-10-08','2015-10-11','2015-10-16','2015-10-13','2015-10-22','2015-10-21','2015-10-26','2015-10-27','2015-10-28',
'2015-11-06','2015-11-05','2015-11-11','2015-11-18','2015-11-17','2015-11-25',
'2015-12-07','2015-12-11','2015-12-13','2015-12-14','2015-12-22','2015-12-21','2015-12-25','2015-12-29'
]

In [64]:
holiday=[
'2014-01-01','2014-01-20','2014-02-17','2014-03-04','2014-03-17','2014-03-09','2014-05-26',
'2014-07-04','2014-09-01','2014-10-13','2014-11-11','2014-11-27','2014-12-25'    
'2015-01-01','2015-01-19','2015-02-16','2015-05-25','2015-07-04','2015-09-07',
'2015-10-12','2015-11-11','2015-11-26','2015-12-25'
]

In [65]:
def MATCH(a, LIST):
    if a in LIST:
        return 1
    else:
        return 0

In [66]:
df=df.reset_index()
df.head(1)
df.columns


Out[66]:
Index(['Date', 'index', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat',
       'Long', 'Shape', 'Duration_Sec', 'Summary', 'WeekDay', 'Week',
       'Quarter', 'TimePer', 'state', 'state abbr', 'Region', 'Hoax', 'ASTR',
       'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
       'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
       'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
       'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
       'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
       'Shape_Triangle', 'Region_Midwest', 'Region_Northeast', 'Region_South',
       'Region_West'],
      dtype='object')

In [26]:
# HOAX Prediction

# 1. Logistic Regression

In [27]:
# All features
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
import statsmodels.formula.api as smf
sm.Logit(df['Hoax'],
sm.add_constant(df[['Year', 'Month', 'Day', 'Lat', 'Long',
       'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']])).fit().summary()


Optimization terminated successfully.
         Current function value: 0.227311
         Iterations 12
Out[27]:
Logit Regression Results
Dep. Variable: Hoax No. Observations: 12172
Model: Logit Df Residuals: 12135
Method: MLE Df Model: 36
Date: Wed, 08 Jun 2016 Pseudo R-squ.: 0.1112
Time: 19:03:03 Log-Likelihood: -2766.8
converged: True LL-Null: -3113.0
LLR p-value: 1.993e-122
coef std err z P>|z| [95.0% Conf. Int.]
const -1988.9681 162.625 -12.230 0.000 -2307.707 -1670.229
Year 0.9865 0.081 12.220 0.000 0.828 1.145
Month 0.1725 0.078 2.198 0.028 0.019 0.326
Day -0.0146 0.005 -3.139 0.002 -0.024 -0.005
Lat -0.0174 0.008 -2.174 0.030 -0.033 -0.002
Long 0.0018 0.004 0.411 0.681 -0.007 0.010
Duration_Sec 1.565e-05 5.26e-06 2.973 0.003 5.33e-06 2.6e-05
WeekDay 0.0042 0.019 0.217 0.828 -0.034 0.042
Week 0.0116 0.015 0.781 0.435 -0.018 0.041
Quarter -0.3344 0.145 -2.302 0.021 -0.619 -0.050
TimePer -0.0185 0.010 -1.867 0.062 -0.038 0.001
ASTR -0.2170 0.105 -2.060 0.039 -0.423 -0.011
Pop 1.629e-08 3.23e-09 5.043 0.000 9.96e-09 2.26e-08
HOL -0.5963 0.222 -2.682 0.007 -1.032 -0.161
Milit_Share 11.3181 9.657 1.172 0.241 -7.609 30.245
Shape_Chevron -0.5858 0.366 -1.601 0.109 -1.303 0.131
Shape_Cigar -0.3963 0.301 -1.317 0.188 -0.986 0.193
Shape_Circle -0.4180 0.145 -2.891 0.004 -0.701 -0.135
Shape_Cone 1.0318 0.353 2.919 0.004 0.339 1.725
Shape_Cross 1.2484 0.418 2.983 0.003 0.428 2.069
Shape_Cylinder -0.4175 0.349 -1.195 0.232 -1.102 0.267
Shape_Diamond -0.2500 0.301 -0.830 0.406 -0.840 0.340
Shape_Disk -0.4771 0.243 -1.960 0.050 -0.954 7.77e-05
Shape_Egg 0.8685 0.351 2.471 0.013 0.180 1.557
Shape_Fireball -0.2888 0.151 -1.911 0.056 -0.585 0.007
Shape_Flash -0.3078 0.263 -1.168 0.243 -0.824 0.209
Shape_Formation -0.9248 0.278 -3.330 0.001 -1.469 -0.380
Shape_Light -0.1021 0.119 -0.855 0.393 -0.336 0.132
Shape_Other -0.1418 0.165 -0.857 0.392 -0.466 0.183
Shape_Oval -0.8490 0.253 -3.361 0.001 -1.344 -0.354
Shape_Rectangle -1.6501 0.518 -3.187 0.001 -2.665 -0.635
Shape_Sphere -0.6276 0.191 -3.281 0.001 -1.002 -0.253
Shape_Teardrop -0.2266 0.417 -0.544 0.587 -1.043 0.590
Shape_Triangle -0.7574 0.187 -4.041 0.000 -1.125 -0.390
Region_Midwest -0.6139 0.166 -3.701 0.000 -0.939 -0.289
Region_Northeast -0.8098 0.215 -3.775 0.000 -1.230 -0.389
Region_South -0.7986 0.167 -4.778 0.000 -1.126 -0.471

In [28]:
from sklearn.metrics import roc_auc_score
#ROC curve
log_reg = LogisticRegression()
log_reg.fit(df[['Year', 'Month', 'Day', 'Lat', 'Long',
       'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']], df['Hoax'])

roc_auc_score(df['Hoax'], log_reg.predict(df[[ 'Year', 'Month', 'Day', 'Lat', 'Long',
       'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South']]))


Out[28]:
0.5

In [29]:
from sklearn.metrics import roc_curve, roc_auc_score

actuals = log_reg.predict(df[['Year', 'Month', 'Day', 'Lat', 'Long',
       'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South'
        ]]) 

probas = log_reg.predict_proba(df[['Year', 'Month', 'Day', 'Lat', 'Long',
       'Duration_Sec', 'WeekDay', 'Week', 'Quarter', 'TimePer', 'ASTR', 'Pop','HOL', 'Milit_Share',
       'Shape_Chevron','Shape_Cigar', 'Shape_Circle', 'Shape_Cone', 'Shape_Cross',
       'Shape_Cylinder', 'Shape_Diamond', 'Shape_Disk', 'Shape_Egg',
       'Shape_Fireball', 'Shape_Flash', 'Shape_Formation', 'Shape_Light',
       'Shape_Other', 'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere',
       'Shape_Teardrop', 'Shape_Triangle', 'Region_Midwest','Region_Northeast', 'Region_South' ]])
plt.plot(roc_curve(df[['Hoax']], probas[:,1])[0], roc_curve(df[['Hoax']], probas[:,1])[1])


Out[29]:
[<matplotlib.lines.Line2D at 0x12fd913c8>]

In [30]:
#reduced to more siginificant features
sm.Logit(df['Hoax'], sm.add_constant(df[['Month', 'Duration_Sec', 'ASTR','HOL']])).fit().summary()


Optimization terminated successfully.
         Current function value: 0.248330
         Iterations 7
Out[30]:
Logit Regression Results
Dep. Variable: Hoax No. Observations: 12172
Model: Logit Df Residuals: 12167
Method: MLE Df Model: 4
Date: Wed, 08 Jun 2016 Pseudo R-squ.: 0.02901
Time: 19:03:07 Log-Likelihood: -3022.7
converged: True LL-Null: -3113.0
LLR p-value: 5.436e-38
coef std err z P>|z| [95.0% Conf. Int.]
const -3.5861 0.105 -34.226 0.000 -3.792 -3.381
Month 0.1407 0.012 11.762 0.000 0.117 0.164
Duration_Sec 1.587e-05 5.74e-06 2.765 0.006 4.62e-06 2.71e-05
ASTR -0.2318 0.101 -2.292 0.022 -0.430 -0.034
HOL -0.7448 0.216 -3.450 0.001 -1.168 -0.322

In [31]:
from sklearn.metrics import roc_auc_score
#ROC curve
log_reg = LogisticRegression()
log_reg.fit(df[['Month', 'Duration_Sec', 'ASTR','HOL']], df['Hoax'])

roc_auc_score(df['Hoax'], log_reg.predict(df[[ 'Month', 'Duration_Sec', 'ASTR','HOL']]))


Out[31]:
0.49991158267020336

In [32]:
from sklearn.metrics import roc_curve, roc_auc_score

actuals = log_reg.predict(df[['Month', 'Duration_Sec', 'ASTR','HOL'
        ]]) 

probas = log_reg.predict_proba(df[['Month', 'Duration_Sec', 'ASTR','HOL' ]])
plt.plot(roc_curve(df[['Hoax']], probas[:,1])[0], roc_curve(df[['Hoax']], probas[:,1])[1])


Out[32]:
[<matplotlib.lines.Line2D at 0x12fe321d0>]

- Random Forest


In [33]:
df = df[(~df.Shape.isnull()) & (~df.Summary.isnull()) ]

In [34]:
df.columns


Out[34]:
Index(['Date', 'index', 'Year', 'Month', 'Day', 'Time', 'City', 'State', 'Lat',
       'Long', 'Shape', 'Duration_Sec', 'Summary', 'WeekDay', 'Week',
       'Quarter', 'TimePer', 'state', 'state abbr', 'Region', 'Hoax', 'ASTR',
       'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
       'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
       'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
       'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
       'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
       'Shape_Triangle', 'Region_Midwest', 'Region_Northeast', 'Region_South',
       'Region_West'],
      dtype='object')

In [35]:
# All Features
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score

X = df[['Year', 'Month', 'Day',  'Lat',
       'Long', 'Duration_Sec', 'WeekDay', 'Week',
       'Quarter', 'TimePer', 'ASTR',
       'HOL', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
       'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
       'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
       'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
       'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
       'Shape_Triangle', 'Region_Midwest', 'Region_Northeast','Region_West', 'Region_South']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7, )
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores,scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485  0.83320707  0.52765152  0.65997475  0.5         0.57422879
  0.5       ], Average AUC 0.5955067095465074
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.77954545  0.54419192  0.65056818  0.47487374  0.56488208
  0.5353626 ], Average AUC 0.590691519625519
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.5885101   0.77954545  0.3709596   0.59930556  0.56546717  0.55670141
  0.5353626 ], Average AUC 0.5708359838508423
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.55359848  0.83320707  0.74835859  0.67714646  0.5         0.56017583
  0.53451058], Average AUC 0.6295710021503577
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485  0.90833333  0.5         0.5157197   0.46767677  0.55721204
  0.4530569 ], Average AUC 0.5679262275652427
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.63028988  0.96435448  0.89397885  0.7565604   0.56590383  0.62786113
  0.64169723], Average AUC 0.7258065429039684
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.61851063  0.96704809  0.92865636  0.80903041  0.6142177   0.60186881
  0.57806155], Average AUC 0.7310562192399628
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.6071391   0.96442024  0.80155198  0.81200284  0.58698443  0.59956307
  0.61315341], Average AUC 0.7121164380605928
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.60240162  0.96889205  0.90779935  0.84701968  0.58261521  0.61873552
  0.59120275], Average AUC 0.7312380249985674
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.60304082  0.97010732  0.87487637  0.79548611  0.5721512   0.64603601
  0.60477925], Average AUC 0.7237824400076907
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.63609007  0.96897622  0.89418929  0.80729956  0.55639468  0.60137134
  0.58650472], Average AUC 0.7215465534078732
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.59360006  0.96369423  0.89872422  0.77617056  0.6117056   0.62700569
  0.58354789], Average AUC 0.7220640352415619
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.61241846  0.96590646  0.89944234  0.78039773  0.59651726  0.62568699
  0.60670358], Average AUC 0.7267246863219468
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.62999527  0.97025726  0.88473801  0.79762731  0.59815867  0.61492683
  0.55787331], Average AUC 0.7219395213003941
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.62090173  0.96312342  0.89920297  0.77712279  0.59321338  0.64862866
  0.58905811], Average AUC 0.7273215799393482
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.62668087  0.97058607  0.91334175  0.80270676  0.55632365  0.62511845
  0.593358  ], Average AUC 0.7268736493565312
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.61268413  0.96864741  0.85261469  0.80599747  0.58434606  0.62923773
  0.60551447], Average AUC 0.7227202829460618
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66043508  0.96897622  0.87697285  0.82811185  0.61100589  0.63252264
  0.58341783], Average AUC 0.7373489081858552
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.64836122  0.96949442  0.89240583  0.8005866   0.58130261  0.61773005
  0.5931085 ], Average AUC 0.7289984603341262
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.60298032  0.9706492   0.9038589   0.81220013  0.58041351  0.62181512
  0.61244207], Average AUC 0.7291941788879196
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.62212226  0.96513047  0.84683291  0.7935685   0.59391572  0.64453569
  0.58581196], Average AUC 0.7217025027466655
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.62050715  0.96484638  0.8965383   0.8004314   0.59913194  0.62523163
  0.60225505], Average AUC 0.7298488366924518
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.63007944  0.97173822  0.88900989  0.79553083  0.57644676  0.65003948
  0.61141488], Average AUC 0.7320370703093415
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.60429819  0.97297191  0.88524832  0.80073127  0.57659407  0.64351179
  0.59369774], Average AUC 0.7252933266247118
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.59968697  0.97018624  0.8724274   0.81669297  0.59836911  0.64112971
  0.59377472], Average AUC 0.7274667312332099
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.62320602  0.96714804  0.89085385  0.81401515  0.58354903  0.62239419
  0.59808522], Average AUC 0.728464500989718
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.6121896   0.96728483  0.88290457  0.8256392   0.59805871  0.63317014
  0.59115232], Average AUC 0.7300570532802764
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.63671086  0.96341803  0.88977536  0.81274989  0.59151673  0.62941935
  0.60816607], Average AUC 0.733108042584684
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.63060553  0.9659275   0.84559659  0.80101273  0.58043718  0.62694778
  0.60601347], Average AUC 0.7223629709880852
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.6243608   0.97047296  0.88619792  0.7963831   0.5743529   0.62034639
  0.60289207], Average AUC 0.7250008768989324
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.6189052   0.97184606  0.89505471  0.79017519  0.60685238  0.61584544
  0.6278792 ], Average AUC 0.73236545494379
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.62334017  0.96708228  0.89346328  0.82088857  0.59223485  0.63842914
  0.61345069], Average AUC 0.7355555690961179
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.62652304  0.96772675  0.88076862  0.82694392  0.5960464   0.63440988
  0.60033868], Average AUC 0.7332510416376936
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.62765678  0.97069392  0.87968487  0.82473169  0.60850431  0.62468414
  0.61142549], Average AUC 0.7353401723417247
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.61910248  0.96833439  0.89169034  0.82341909  0.59207439  0.63150927
  0.6108681 ], Average AUC 0.7338568643920204
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.62262995  0.97069129  0.89883207  0.79267677  0.59339752  0.63292535
  0.58666663], Average AUC 0.7282599389094733
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.62554714  0.97085175  0.88693182  0.79422875  0.59076705  0.63104864
  0.60968961], Average AUC 0.7298663925308856
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.61981534  0.9688105   0.88633207  0.81704809  0.59979219  0.61450569
  0.58469983], Average AUC 0.727286243757411
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.61771886  0.97092803  0.88878367  0.82199863  0.5946391   0.62810328
  0.60904728], Average AUC 0.7330312649687867
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.60251999  0.97284827  0.87755419  0.81374421  0.59388941  0.6473784
  0.61134852], Average AUC 0.7313261423277716
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.54009891  0.79165088  0.61919981  0.53062395  0.49251894  0.4928985
  0.58172176], Average AUC 0.5783875360585188
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.5875      0.59850589  0.74452336  0.6701573   0.57473169  0.5787271
  0.52927109], Average AUC 0.6119166335356766
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.62788563  0.90236742  0.75104167  0.50675505  0.53962542  0.52281007
  0.69109552], Average AUC 0.6487972529396816
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.54663826  0.896754    0.70006839  0.65460859  0.56899463  0.60953885
  0.55673994], Average AUC 0.647620379977431
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.54880051  0.91106902  0.75967224  0.72825126  0.60242529  0.60324805
  0.56754275], Average AUC 0.6744298754388686
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.64482586  0.97267729  0.82685185  0.74975274  0.60001841  0.66515582
  0.61433986], Average AUC 0.724803120101308
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.63358323  0.96882365  0.71971275  0.78791561  0.60856218  0.64664666
  0.6300663 ], Average AUC 0.7136157711894714
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.62216435  0.95352483  0.86424137  0.71327336  0.59740372  0.66181565
  0.64199982], Average AUC 0.7220604433998278
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.63530093  0.9705282   0.82642835  0.69346328  0.60801768  0.62334702
  0.60716542], Average AUC 0.7091786949478325
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.60167824  0.96404146  0.80283302  0.78322285  0.59487847  0.6486892
  0.61645795], Average AUC 0.7159715988751979
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.62376631  0.97342172  0.80575021  0.77088068  0.57096749  0.65124237
  0.63624009], Average AUC 0.7188955522204225
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.63151042  0.96699021  0.79587016  0.75394571  0.61017203  0.66364498
  0.63090505], Average AUC 0.7218626508327605
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.64996054  0.9665746   0.80081019  0.74131944  0.6010101   0.66615603
  0.65162945], Average AUC 0.7253514788278107
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.6560948   0.96864741  0.82857481  0.79339489  0.60565288  0.67747684
  0.64974758], Average AUC 0.739941315949911
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.63662142  0.9690262   0.7534354   0.74745107  0.57365846  0.65454833
  0.63377695], Average AUC 0.7097882605371826
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.65515572  0.96656671  0.77732534  0.77446075  0.58358586  0.66227627
  0.65695122], Average AUC 0.7251888385944071
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.64458386  0.96976536  0.80246212  0.7608165   0.57275621  0.66449779
  0.64950604], Average AUC 0.7234839830801736
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.6296533   0.97422927  0.85083386  0.77697022  0.57869055  0.66410823
  0.6448611 ], Average AUC 0.7313352208441619
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.64513626  0.97497106  0.78254156  0.76776357  0.56707176  0.66186829
  0.64271116], Average AUC 0.7202948091768532
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.64050926  0.97183028  0.82243792  0.76441235  0.58298085  0.64913403
  0.66410974], Average AUC 0.7279163472268504
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.64717487  0.97516046  0.79735638  0.73356481  0.60893834  0.66011266
  0.64500974], Average AUC 0.7239024659599987
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.63570602  0.97617582  0.8201573   0.7816472   0.59678293  0.6547431
  0.64575559], Average AUC 0.7301382808255165
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.66009838  0.97418718  0.80458228  0.78050295  0.60209649  0.66880922
  0.65178074], Average AUC 0.7345796053562635
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.63171033  0.97213016  0.84660406  0.77423453  0.59878735  0.66907507
  0.65069249], Average AUC 0.7347477139896108
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.64473906  0.97558923  0.83618476  0.79320812  0.55318813  0.65283744
  0.63119701], Average AUC 0.7267062507578947
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.62567603  0.97025463  0.78373053  0.76050873  0.59433396  0.66282902
  0.66332673], Average AUC 0.7229513775466382
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.65160196  0.9692419   0.83776305  0.76512784  0.58175768  0.66126027
  0.64647754], Average AUC 0.7304614617052242
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.6362479   0.97267992  0.83070286  0.77003104  0.60373527  0.65692251
  0.66053711], Average AUC 0.732979516449487
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.64436553  0.97399779  0.80724169  0.77793824  0.59648306  0.66898294
  0.63763092], Average AUC 0.7295200242091068
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.63156303  0.97433975  0.80737058  0.78180766  0.60837279  0.65947041
  0.6529141 ], Average AUC 0.7308340466926416
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.65093382  0.9729456   0.82653356  0.79218224  0.62156197  0.66115761
  0.63344251], Average AUC 0.7369653315225476
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.63403304  0.96258944  0.83465909  0.72682029  0.60356166  0.67110708
  0.67204595], Average AUC 0.7292595050975271
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.64904777  0.97529198  0.81208176  0.77611795  0.60601326  0.66890398
  0.65149142], Average AUC 0.7341354455062812
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.64944234  0.97254577  0.80852273  0.77436606  0.59070128  0.66229469
  0.64777813], Average AUC 0.7293787138584032
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.62925084  0.97335596  0.80786511  0.77336911  0.57211963  0.66914087
  0.63879083], Average AUC 0.7234131927056637
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.6509496   0.97592856  0.80270676  0.77143308  0.60970381  0.66237366
  0.6540342 ], Average AUC 0.7324470937628164
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.654627    0.97635995  0.80908828  0.76241846  0.59305819  0.67057802
  0.63894743], Average AUC 0.7292967598868673
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.62087016  0.97455808  0.78861006  0.77628893  0.60742845  0.65725679
  0.62311216], Average AUC 0.7211606622994342
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.65768887  0.9728588   0.78296507  0.75313815  0.60333281  0.65940724
  0.6500077 ], Average AUC 0.7256283760054332
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.65549505  0.97357428  0.82674137  0.77430029  0.6025384   0.67623973
  0.64389761], Average AUC 0.7361123935765976
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.55263836  0.83441972  0.50463226  0.66463068  0.6295665   0.60520636
  0.57285656], Average AUC 0.6234214913141471
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.52863794  0.77332702  0.77033617  0.64473117  0.5794718   0.561255
  0.56098674], Average AUC 0.6312494070363553
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.62841961  0.95251999  0.73274937  0.68579809  0.46687447  0.56618235
  0.69342595], Average AUC 0.6751385478848724
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.61306292  0.92574179  0.6727904   0.7023911   0.57022306  0.63676037
  0.51658907], Average AUC 0.6625083892778669
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.37390309  0.77704914  0.82506839  0.53658723  0.54666193  0.56401348
  0.57489502], Average AUC 0.5997397545722347
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.64833491  0.97764362  0.75792824  0.77776463  0.58432239  0.66868288
  0.64546893], Average AUC 0.7228779427668035
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.64051452  0.97606008  0.73693971  0.66810553  0.52757786  0.67210729
  0.65118619], Average AUC 0.6960701681604736
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.65495844  0.97690709  0.7686553   0.78837332  0.53802872  0.65142925
  0.63084665], Average AUC 0.7155998251565377
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.65558449  0.97369003  0.69247159  0.69001473  0.61677452  0.67180985
  0.62927799], Average AUC 0.7042318855220643
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.65117319  0.97070444  0.70960911  0.6521412   0.57467119  0.65865445
  0.66770625], Average AUC 0.6978085484877509
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.654385    0.97232481  0.76893939  0.73787616  0.55566077  0.66374237
  0.67485415], Average AUC 0.7182546639757155
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.65950652  0.94583596  0.72841172  0.73954388  0.57196707  0.67907454
  0.63439539], Average AUC 0.7083907259279982
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.67341383  0.97557081  0.70672875  0.71377578  0.5698548   0.66482944
  0.64067269], Average AUC 0.7064065845431348
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.64535459  0.97284564  0.69715646  0.71888678  0.60133628  0.67555801
  0.66060082], Average AUC 0.7102483692218888
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.67789089  0.97085964  0.77571812  0.66290246  0.5694839   0.67635292
  0.65485967], Average AUC 0.712581084932266
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.65538721  0.97592593  0.71052452  0.74271359  0.56805029  0.68364656
  0.6658005 ], Average AUC 0.7145783698827801
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.65425084  0.97452652  0.73592435  0.74152199  0.57220907  0.68136713
  0.65927634], Average AUC 0.7170108914071859
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.6739294   0.97306397  0.74603851  0.70572391  0.5584254   0.67456307
  0.64653063], Average AUC 0.7111821257413358
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.660635    0.97267203  0.73394886  0.73337016  0.57663878  0.68013792
  0.6555365 ], Average AUC 0.7161341805082356
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.66929451  0.97532355  0.72878788  0.69412879  0.5806634   0.68468625
  0.66854234], Average AUC 0.7144895306874801
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.65296454  0.97686237  0.73677662  0.68961753  0.58801557  0.67795325
  0.65511182], Average AUC 0.7110431020217354
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.67865372  0.97417666  0.72020202  0.70434291  0.57051768  0.6741893
  0.66530946], Average AUC 0.7124845363797331
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.65048401  0.97423716  0.72020465  0.6819234   0.53846012  0.67431565
  0.66263663], Average AUC 0.7003230880638783
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.66583544  0.97271412  0.72696759  0.75179398  0.60976694  0.68526269
  0.66126173], Average AUC 0.7248003549185073
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.67446075  0.97315341  0.72290878  0.70196759  0.61024832  0.68196462
  0.66498564], Average AUC 0.718527015918608
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.66663773  0.97453441  0.68459596  0.69294508  0.56491214  0.68459939
  0.65108532], Average AUC 0.7027585755116476
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.65722064  0.97758049  0.71740846  0.69404461  0.56463594  0.68280427
  0.65955504], Average AUC 0.7076070667599721
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.65103641  0.97314552  0.73439078  0.73582965  0.57070444  0.67432354
  0.66371425], Average AUC 0.7147349418285119
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.65394834  0.97251157  0.69085122  0.73435922  0.56777673  0.66615866
  0.66511039], Average AUC 0.7072451614617432
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.63681082  0.97289825  0.74816393  0.70817551  0.59111953  0.67717941
  0.6547535 ], Average AUC 0.7127287057619457
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.64308975  0.96725063  0.71125842  0.66490951  0.57281934  0.68171194
  0.65833143], Average AUC 0.6999101459986166
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.65096801  0.97627578  0.6996896   0.66980745  0.58465383  0.67758212
  0.66423714], Average AUC 0.7033162771364248
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.65736795  0.97509733  0.73086069  0.7374737   0.57216435  0.68148821
  0.66283835], Average AUC 0.7167557962784838
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.65788352  0.97496317  0.7543771   0.6394939   0.58176031  0.68178827
  0.66212701], Average AUC 0.7074847559619412
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.66462542  0.97463173  0.76002736  0.70828072  0.58724221  0.68138555
  0.66314624], Average AUC 0.7199056069024021
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.65458228  0.97565236  0.73637416  0.7223669   0.57825126  0.6843546
  0.67001014], Average AUC 0.7173702424687692
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.65135469  0.97682555  0.69820076  0.71298401  0.57743582  0.68075121
  0.66042033], Average AUC 0.708281765544375
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.66178714  0.97532355  0.70226221  0.70785985  0.59051189  0.66964361
  0.65516491], Average AUC 0.708936164526637
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.64925558  0.9749737   0.73485901  0.72157513  0.59316604  0.67292325
  0.66328692], Average AUC 0.7157199437930158
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.66031408  0.97446601  0.72472643  0.70967487  0.58825758  0.68208833
  0.66319933], Average AUC 0.7146752337793275
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.61674821  0.8455282   0.57505261  0.75975116  0.59886101  0.61435039
  0.52956837], Average AUC 0.6485514196829182
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.65097327  0.83401726  0.72545507  0.60422717  0.56292088  0.5999263
  0.60241165], Average AUC 0.6542759426117417
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.56281566  0.93666877  0.8067077   0.69787195  0.57274569  0.61864077
  0.56967942], Average AUC 0.6807328500355648
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.60562395  0.89917929  0.68988847  0.57542088  0.51927083  0.54853917
  0.5388317 ], Average AUC 0.6252506125937298
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.59711963  0.84238479  0.69423401  0.3484375   0.44429714  0.61629554
  0.53441503], Average AUC 0.5824548041872875
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.66273148  0.94165088  0.66576705  0.62904566  0.61181871  0.65279796
  0.67545401], Average AUC 0.691323678701861
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.66551978  0.97498685  0.64291614  0.71703756  0.5529093   0.67947989
  0.69272788], Average AUC 0.7036539150191271
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.64586753  0.97643098  0.66336806  0.67051242  0.5553951   0.66212624
  0.67695632], Average AUC 0.692950946785677
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.63158933  0.97124106  0.66226326  0.66546717  0.60073653  0.66544009
  0.65328039], Average AUC 0.6928596901417778
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.6548085   0.97623106  0.6715383   0.68731061  0.54047506  0.65320331
  0.68338226], Average AUC 0.6952784424494702
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.66947601  0.97136206  0.68610848  0.60649726  0.5600142   0.68506791
  0.68524289], Average AUC 0.6948241167566683
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.66499369  0.94360006  0.68721591  0.68080282  0.55239899  0.69039008
  0.65906931], Average AUC 0.6969244091586397
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.64525463  0.97345591  0.64472854  0.71331019  0.57239583  0.66078122
  0.67090728], Average AUC 0.6972619414665278
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.6559475   0.97318497  0.72830913  0.66214226  0.5262258   0.67319436
  0.64679871], Average AUC 0.6951146746637266
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.6419113   0.97296402  0.69813237  0.65803872  0.57594697  0.68286218
  0.66689139], Average AUC 0.6995352780067118
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.65899095  0.96985743  0.63398569  0.71889731  0.53936763  0.67806117
  0.67020125], Average AUC 0.6956230610129316
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.66315236  0.97357428  0.64614899  0.62573653  0.58272043  0.67456043
  0.68575251], Average AUC 0.6930922196453925
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.642248    0.97512363  0.67836963  0.64569655  0.55799926  0.67957465
  0.63666477], Average AUC 0.6879537856741565
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.66559343  0.97312447  0.70289878  0.67686237  0.55093645  0.65539587
  0.65676011], Average AUC 0.6973673565858926
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.64110638  0.97233796  0.6889678   0.66642992  0.57150936  0.67438408
  0.66069637], Average AUC 0.6964902687642691
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.64771149  0.97797769  0.6387258   0.60797033  0.55032881  0.68519162
  0.66826364], Average AUC 0.6823099115980006
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.64914773  0.97298243  0.63721591  0.68885732  0.58152357  0.68738945
  0.6748223 ], Average AUC 0.6988483864743182
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.66129787  0.9740241   0.64410248  0.59813237  0.5597722   0.67647399
  0.65924184], Average AUC 0.6818635504840189
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.65685764  0.9739557   0.67777252  0.64489162  0.58138152  0.67929564
  0.66719929], Average AUC 0.697336276351247
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.65328283  0.9743871   0.67897727  0.67430029  0.57306397  0.67813224
  0.65764133], Average AUC 0.6985407189801046
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.6525463   0.97496843  0.63980955  0.67636785  0.51845802  0.68265161
  0.68051301], Average AUC 0.6893306817098063
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.65388521  0.97161984  0.63980429  0.62767782  0.5093487   0.68181986
  0.67247594], Average AUC 0.6795188078006907
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.64475221  0.97265099  0.66684817  0.65382471  0.57458439  0.66525848
  0.66554834], Average AUC 0.6919238965873715
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.67616004  0.97417403  0.6513205   0.64056713  0.57876157  0.67575542
  0.64984844], Average AUC 0.6923695906593939
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.64389994  0.97390572  0.70099958  0.66031671  0.57497896  0.67582649
  0.66990397], Average AUC 0.6999759091196698
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.66539089  0.97491319  0.66376789  0.62882207  0.54526778  0.69067698
  0.66484231], Average AUC 0.6905258745893565
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.6584596   0.96876578  0.65530303  0.61936027  0.54447338  0.67868762
  0.66968632], Average AUC 0.6849622852919474
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.65447706  0.97477378  0.65351431  0.63839436  0.56949968  0.67655822
  0.66096179], Average AUC 0.6897398876586598
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.66179766  0.97150936  0.65014731  0.63794192  0.56493845  0.67974574
  0.66199164], Average AUC 0.689724583076864
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.64836122  0.97405303  0.70105219  0.6703388   0.57353746  0.68473889
  0.66362666], Average AUC 0.7022440362517416
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.66997317  0.96444655  0.65826494  0.61417298  0.56702441  0.68397031
  0.66595179], Average AUC 0.6891148781959455
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.65527146  0.97598117  0.64760101  0.63527462  0.53752367  0.68545746
  0.673888  ], Average AUC 0.6872853431350654
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.65343013  0.97339015  0.64078283  0.63468013  0.57095434  0.67608707
  0.66179258], Average AUC 0.687302461523447
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.64649621  0.97489478  0.65356692  0.64010417  0.56871843  0.67842441
  0.6708887 ], Average AUC 0.6904419450298608
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.65320918  0.97409249  0.68586385  0.64705387  0.58934922  0.67708728
  0.66413893], Average AUC 0.6986849740550267
((24, 41, 2), 0.73994131594991097)

In [137]:
print (best)


((30, 21, 2), 0.7402365054023613)

In [36]:
model = RandomForestClassifier(n_estimators=21, max_depth=2, min_samples_leaf=30, min_samples_split=6)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())


0.92964360587
0.721820840019

In [140]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[140]:
Features Importance Score
0 Year 0.130991
1 Month 0.121567
12 Pop 0.120769
7 Week 0.118995
35 Region_West 0.096590
8 Quarter 0.089630
2 Day 0.080987
5 Duration_Sec 0.063953
4 Long 0.050639
9 TimePer 0.040954
3 Lat 0.029090
6 WeekDay 0.018264
33 Region_Midwest 0.014872
13 Milit_Share 0.014493
22 Shape_Egg 0.005622
26 Shape_Light 0.001783
11 HOL 0.000474
20 Shape_Diamond 0.000192
25 Shape_Formation 0.000135
34 Region_Northeast 0.000000
32 Shape_Triangle 0.000000
31 Shape_Teardrop 0.000000
30 Shape_Sphere 0.000000
29 Shape_Rectangle 0.000000
28 Shape_Oval 0.000000
27 Shape_Other 0.000000
18 Shape_Cross 0.000000
24 Shape_Flash 0.000000
23 Shape_Fireball 0.000000
21 Shape_Disk 0.000000
19 Shape_Cylinder 0.000000
17 Shape_Cone 0.000000
16 Shape_Circle 0.000000
15 Shape_Cigar 0.000000
14 Shape_Chevron 0.000000
10 ASTR 0.000000
36 Region_South 0.000000

In [37]:
# 2nd revision: remove the 5 least important features
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score

X = df[['Year', 'Month', 'Lat', 'Long', 'Duration_Sec', 'Week',
       'Quarter', 'TimePer', 'Pop', 'Milit_Share', 'Shape_Chevron', 'Shape_Cigar',
       'Shape_Circle', 'Shape_Cone', 'Shape_Cross', 'Shape_Cylinder',
       'Shape_Diamond', 'Shape_Disk', 'Shape_Egg', 'Shape_Fireball',
       'Shape_Flash', 'Shape_Formation', 'Shape_Light', 'Shape_Other',
       'Shape_Oval', 'Shape_Rectangle', 'Shape_Sphere', 'Shape_Teardrop',
       'Shape_Triangle', 'Region_Midwest', 'Region_Northeast','Region_West']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.56717172  0.59880051  0.5032197   0.49810606  0.49968434  0.6105259
  0.5353626 ], Average AUC 0.5446958315380748
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.50164141  0.77954545  0.9125      0.65056818  0.55069444  0.5
  0.53584833], Average AUC 0.6329711171974293
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.5         0.54905303  0.5032197   0.65751263  0.51565657  0.6105259
  0.53451058], Average AUC 0.5529254859833632
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.91401515  0.74488636  0.65056818  0.55498737  0.5
  0.58429108], Average AUC 0.6477378304640258
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.56717172  0.60915404  0.37064394  0.65751263  0.50991162  0.56017583
  0.53451058], Average AUC 0.544154335483691
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64037774  0.95833596  0.85390888  0.82712805  0.59408933  0.64849442
  0.57352012], Average AUC 0.7279792148403323
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.57314026  0.96291824  0.90542666  0.81364689  0.61184764  0.6165219
  0.56509287], Average AUC 0.7212277806257523
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65885943  0.9615846   0.84119581  0.82299032  0.61196075  0.62429722
  0.62015533], Average AUC 0.7344347794823437
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.58540614  0.96023779  0.90136258  0.7654698   0.63495633  0.65763582
  0.63289043], Average AUC 0.7339941293107018
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.55930661  0.96082702  0.89076968  0.78248895  0.59786406  0.66427932
  0.56219708], Average AUC 0.716818959800272
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.60507155  0.96330492  0.88141572  0.81638258  0.61006681  0.63520741
  0.62171337], Average AUC 0.7333089096042841
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.62658354  0.95882786  0.860877    0.78493266  0.57609954  0.6433407
  0.60794577], Average AUC 0.7226581533548703
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.58920981  0.96344171  0.90842277  0.78344118  0.60739426  0.65395873
  0.60224444], Average AUC 0.7297304122480747
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.64181134  0.96167666  0.89194024  0.80190972  0.59871107  0.64147189
  0.61967225], Average AUC 0.7367418819696117
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.60582123  0.96322075  0.87579177  0.80871212  0.58762889  0.66122605
  0.61273138], Average AUC 0.7307331715017374
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.64910301  0.96455703  0.89147201  0.8131471   0.5931108   0.65389556
  0.62044199], Average AUC 0.7408182120077432
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.6468408   0.96580124  0.89656197  0.82608375  0.59444971  0.67057012
  0.59308992], Average AUC 0.7419139313893625
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.6360322   0.9633549   0.88996738  0.82107271  0.5947338   0.66187882
  0.61803193], Average AUC 0.7407245323975223
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.59244792  0.96445444  0.88549295  0.81054819  0.61693497  0.64434618
  0.59820732], Average AUC 0.7303474240954542
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.63167614  0.96532513  0.85536616  0.79937658  0.59841909  0.65842809
  0.60599755], Average AUC 0.7306555324030172
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.60872527  0.96610375  0.84042771  0.81059817  0.62130156  0.64878395
  0.63546505], Average AUC 0.733057923774342
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.63197075  0.96578809  0.9010101   0.79579914  0.60508733  0.65239261
  0.62712008], Average AUC 0.7398811572687837
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.64115899  0.9666693   0.87313763  0.77689131  0.62445023  0.64679669
  0.61527681], Average AUC 0.7349115650658319
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.6119213   0.9670323   0.88984901  0.83634259  0.63046086  0.667759
  0.59867712], Average AUC 0.7431488833401619
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.61099011  0.96495949  0.89124053  0.82989268  0.62243529  0.63386766
  0.61385679], Average AUC 0.7381775058827426
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.63208912  0.96566446  0.85920665  0.81743476  0.62164089  0.65437987
  0.60375736], Average AUC 0.7363104439984387
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.60123369  0.96665878  0.89225063  0.79858218  0.59659091  0.66128659
  0.61207048], Average AUC 0.7326676064292528
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.60930661  0.96600116  0.88476957  0.84592803  0.62195391  0.63842914
  0.60755294], Average AUC 0.7391344803333669
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.64538615  0.96598801  0.88073969  0.79078283  0.59945286  0.65125816
  0.61383024], Average AUC 0.7353482773312174
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.61300505  0.96415457  0.86242109  0.83659775  0.61563026  0.64908665
  0.60782102], Average AUC 0.7355309114902494
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.64434712  0.96379156  0.87664668  0.81560659  0.61085069  0.64332491
  0.61142284], Average AUC 0.7379986262045992
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.60944602  0.96689815  0.89538352  0.82087805  0.61422033  0.66506107
  0.61835309], Average AUC 0.7414628897906242
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.62107797  0.96543035  0.89324758  0.84287405  0.61209754  0.65727522
  0.61764175], Average AUC 0.7442349213313905
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.60981429  0.9645807   0.86615109  0.83343855  0.60455598  0.63895031
  0.6322826 ], Average AUC 0.7356819319011576
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.63002157  0.96710859  0.83206545  0.80264625  0.61018255  0.6543904
  0.62352623], Average AUC 0.731420147916489
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.62155934  0.96790036  0.88172085  0.8047822   0.59818234  0.65287955
  0.60749455], Average AUC 0.7335027422720046
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.64282934  0.96433344  0.89308712  0.80921454  0.61352851  0.64458833
  0.61578377], Average AUC 0.7404807226786432
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.62512626  0.96623001  0.8787642   0.81617477  0.61867372  0.66398979
  0.61459998], Average AUC 0.7405083893522345
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.62820128  0.96694287  0.88308607  0.80962226  0.60658933  0.65908349
  0.62047384], Average AUC 0.7391427346105622
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.59809028  0.96529619  0.88015835  0.81173716  0.60679188  0.65725942
  0.61976781], Average AUC 0.734157298960186
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.50903304  0.83396991  0.75684712  0.4342803   0.55069444  0.56661139
  0.51912388], Average AUC 0.5957942981469565
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.67852483  0.96364162  0.63543245  0.53268624  0.56918666  0.59928932
  0.55051041], Average AUC 0.6470387911492883
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.63904409  0.83320707  0.59902146  0.56300505  0.54884259  0.54194567
  0.5433864 ], Average AUC 0.6097789055142817
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.60922243  0.83442761  0.55379314  0.64482586  0.53156566  0.5730996
  0.53399035], Average AUC 0.6115606643192157
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.58891519  0.95117319  0.66338384  0.72366372  0.53248369  0.60699095
  0.54110905], Average AUC 0.6582456616988475
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.64267414  0.96445181  0.82278251  0.76868161  0.65585806  0.67459992
  0.67920181], Average AUC 0.7440356937515684
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.65095749  0.94904514  0.86366793  0.70562658  0.58990162  0.65627764
  0.65535867], Average AUC 0.724405009266687
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.6471512   0.96503314  0.81353641  0.76677715  0.60070234  0.65726469
  0.63495544], Average AUC 0.7264886219850825
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.63438026  0.96762942  0.87737269  0.76828967  0.60050768  0.65390872
  0.63234365], Average AUC 0.7334902975320696
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.59618845  0.9730666   0.86317077  0.77491846  0.60032618  0.64409086
  0.6338088 ], Average AUC 0.7265100162990598
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.67176978  0.96944971  0.7492924   0.79651199  0.614002    0.6592151
  0.63729118], Average AUC 0.7282188797842497
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.63090015  0.97098853  0.83944129  0.77239846  0.5984638   0.67563698
  0.66038582], Average AUC 0.7354592904431916
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.65743897  0.97553662  0.81986795  0.7344092   0.60154146  0.65451674
  0.63816973], Average AUC 0.7259258094832336
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.61612479  0.967369    0.79694865  0.76341277  0.60884101  0.62701358
  0.63794412], Average AUC 0.7168077053802595
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.64506524  0.96694813  0.80946444  0.77921402  0.5984375   0.66373447
  0.65309725], Average AUC 0.7308515758381462
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.65240951  0.94272938  0.7851089   0.77479745  0.56297612  0.66113392
  0.64737999], Average AUC 0.718076467194505
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.64146412  0.97146728  0.82112005  0.77236953  0.61525673  0.67056222
  0.65365995], Average AUC 0.7351285551632836
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.63118687  0.96541456  0.8215988   0.79307397  0.57981113  0.65115551
  0.65021207], Average AUC 0.7274932733766407
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.65465593  0.96923927  0.76655093  0.78539299  0.57901936  0.64343546
  0.65057571], Average AUC 0.7212670925659996
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.62625737  0.96823706  0.8506471   0.78730008  0.59943182  0.64580964
  0.64900704], Average AUC 0.732384301462454
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.62819076  0.96640888  0.84096696  0.76916824  0.62223011  0.6619104
  0.62704311], Average AUC 0.7308454963813757
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.64084333  0.96940499  0.81658775  0.7717803   0.5692419   0.67743736
  0.63838207], Average AUC 0.726239671382776
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.64534407  0.97344802  0.84423401  0.76597485  0.6059738   0.67793746
  0.64017369], Average AUC 0.7361551288905078
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.63512205  0.97027304  0.80591856  0.78055819  0.59700652  0.63602074
  0.65090749], Average AUC 0.7251152281311716
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.61993108  0.97010469  0.79951073  0.77589436  0.60902515  0.66070489
  0.63234896], Average AUC 0.723931408478209
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.64470749  0.97574968  0.80109954  0.78430661  0.60113373  0.66953043
  0.64917692], Average AUC 0.7322434854047161
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.6314657   0.96957597  0.81632997  0.7672138   0.60660511  0.6801195
  0.64449482], Average AUC 0.7308292666335439
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.64790351  0.96877894  0.82934291  0.76541719  0.62034407  0.67256001
  0.63822547], Average AUC 0.7346531574545245
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.64875842  0.96946286  0.84110375  0.75994581  0.60287247  0.67776111
  0.64882656], Average AUC 0.7355329958487539
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.63623211  0.96587489  0.83792877  0.7790483   0.58484322  0.6659981
  0.64749943], Average AUC 0.7310606897101541
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.64384207  0.96211858  0.79582544  0.78147622  0.59631471  0.68182775
  0.637116  ], Average AUC 0.7283601099324856
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.63222327  0.96911037  0.82122527  0.76223169  0.56509628  0.66648242
  0.65065003], Average AUC 0.7238599047655363
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.65253314  0.96903409  0.82065972  0.76732955  0.5961411   0.66999368
  0.64775955], Average AUC 0.7319215471296389
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.62965593  0.97222748  0.81281566  0.77721486  0.60962226  0.66253685
  0.64532029], Average AUC 0.7299133333885123
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.64427083  0.97054661  0.80969592  0.76097696  0.59187973  0.67715308
  0.64908933], Average AUC 0.729087495109343
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.61968382  0.9699416   0.83333859  0.77848801  0.61121896  0.65268478
  0.66600222], Average AUC 0.7330511393337907
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.62326915  0.97152252  0.82033091  0.77339541  0.59562553  0.67550011
  0.64519819], Average AUC 0.7292631166841692
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.64736164  0.97114373  0.81509101  0.76887363  0.61014573  0.66356601
  0.65855704], Average AUC 0.7335341138806896
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.62598117  0.97060974  0.81804503  0.76667456  0.60920402  0.66610865
  0.64345966], Average AUC 0.7285832618116831
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.62958491  0.96339173  0.82854324  0.77295612  0.61524095  0.6714282
  0.64396927], Average AUC 0.7321592044385217
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.56775568  0.83230745  0.36332071  0.23578756  0.54866635  0.57971415
  0.54345541], Average AUC 0.524429616299419
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.56721907  0.66883944  0.7947601   0.39481797  0.4574153   0.51011529
  0.53736125], Average AUC 0.5615040584727577
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.47098064  0.93904672  0.80948022  0.57373737  0.47945602  0.6270399
  0.4504345 ], Average AUC 0.6214536245968809
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.53489057  0.8293771   0.7571049   0.5764415   0.46064289  0.6383028
  0.52419351], Average AUC 0.6172790388077709
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.56471223  0.88919402  0.79483902  0.65997475  0.47950863  0.57729522
  0.69242795], Average AUC 0.6654216872789079
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.62975063  0.97273779  0.73304398  0.64302662  0.61090593  0.66608497
  0.64305621], Average AUC 0.6998008770175215
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.64955019  0.94708018  0.72689657  0.69738531  0.58632681  0.6594283
  0.6375221 ], Average AUC 0.7005984935115084
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.67279303  0.96871054  0.68671086  0.6938447   0.55730745  0.66489787
  0.66809908], Average AUC 0.701766219091566
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.63489846  0.97217487  0.742006    0.70514257  0.60145465  0.62959307
  0.66966509], Average AUC 0.7078478165666938
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.67859585  0.97267729  0.78581124  0.67965593  0.59991319  0.65891503
  0.65296984], Average AUC 0.7183626273492065
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.65517677  0.94077757  0.74688815  0.71977588  0.58045823  0.68693146
  0.66139178], Average AUC 0.7130571202970236
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.63959649  0.97369003  0.70524253  0.72084122  0.59012258  0.66752737
  0.66573149], Average AUC 0.7089645292240186
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.63581913  0.97180398  0.72226168  0.77105955  0.59877157  0.66610076
  0.66039909], Average AUC 0.7180308229218293
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.64659354  0.97054661  0.68594539  0.66578809  0.56313394  0.66587703
  0.66341167], Average AUC 0.6944708959504091
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.64618056  0.97272727  0.72482639  0.67991898  0.57608112  0.66611918
  0.65291676], Average AUC 0.7026814661042955
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.6558791   0.96763731  0.72921665  0.68983586  0.57860375  0.67077279
  0.68108102], Average AUC 0.710432354586535
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.64744844  0.97206965  0.76436237  0.66899989  0.59950284  0.68531796
  0.64332695], Average AUC 0.7115754448829629
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.64648832  0.9750947   0.73691604  0.70613426  0.58815499  0.66743788
  0.66394783], Average AUC 0.71202485847833
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.65018676  0.96999158  0.67528672  0.71488058  0.60949863  0.67038061
  0.67149387], Average AUC 0.7088169640802461
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.64423138  0.97265099  0.75452178  0.71089541  0.57905619  0.65947568
  0.6539413 ], Average AUC 0.7106818175151431
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.64577283  0.97673611  0.72604693  0.70235427  0.56155829  0.65780691
  0.6698615 ], Average AUC 0.7057338345999017
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.63802346  0.96752946  0.73069497  0.70244108  0.58475379  0.66719046
  0.67312889], Average AUC 0.7091088724648024
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.65689447  0.97004945  0.72626526  0.69309238  0.59133786  0.66415561
  0.65738386], Average AUC 0.7084541271287671
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.64615688  0.9732823   0.75193866  0.71791088  0.56952862  0.66179196
  0.67431003], Average AUC 0.7135599033573242
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.6455282   0.96867898  0.75700758  0.71708228  0.55868845  0.68216467
  0.65738917], Average AUC 0.7123627596796676
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.66085332  0.97286932  0.74768782  0.73573495  0.58892835  0.68382554
  0.64473104], Average AUC 0.7192329064710522
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.65069707  0.97558923  0.70026568  0.74520465  0.570494    0.6763845
  0.66528026], Average AUC 0.7119879136808536
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.65454019  0.97274306  0.70341961  0.67369792  0.58364373  0.67635292
  0.64250147], Average AUC 0.700985556726998
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.66274726  0.97031513  0.75918561  0.70183081  0.54487058  0.68303853
  0.6628702 ], Average AUC 0.7121225894193094
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.64455492  0.97319287  0.7515362   0.72536564  0.58960701  0.6622052
  0.66180585], Average AUC 0.7154668109526876
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.65087332  0.9723064   0.74645676  0.68275989  0.5599616   0.66950411
  0.65685832], Average AUC 0.7055314831454318
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.64473906  0.97455545  0.7025463   0.7115767   0.57654935  0.67044115
  0.65720337], Average AUC 0.705373053664267
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.65586069  0.97540509  0.70245686  0.72957439  0.58239952  0.67307591
  0.65360686], Average AUC 0.7103399031961669
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.65193603  0.97091488  0.71953914  0.7061553   0.57703335  0.66802748
  0.65540645], Average AUC 0.7070018041083213
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.64287142  0.97361374  0.74876368  0.70601326  0.59085911  0.68005633
  0.65372365], Average AUC 0.7137001699548481
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.64509154  0.97303767  0.73279146  0.66973643  0.58718171  0.67142556
  0.67252637], Average AUC 0.707398676854625
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.65259101  0.97179346  0.69942656  0.73473801  0.57286669  0.67809276
  0.67949909], Average AUC 0.7127153665226212
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.6470907   0.97373211  0.68815236  0.70598432  0.56958912  0.67203359
  0.66032477], Average AUC 0.702415281626443
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.64183765  0.97291141  0.77195654  0.67844329  0.56339962  0.67428932
  0.65629827], Average AUC 0.7084480148808473
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.64557555  0.97238005  0.75388258  0.69724064  0.58593224  0.66277374
  0.65075885], Average AUC 0.7097919486318524
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.60881734  0.79375263  0.77094381  0.6226089   0.57724116  0.61575068
  0.61333125], Average AUC 0.657492253782547
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.58637942  0.93146044  0.51494108  0.70093119  0.60944865  0.61062592
  0.57759174], Average AUC 0.6473397771246302
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.5905987   0.93338068  0.73087384  0.69344223  0.59707492  0.54634397
  0.54635651], Average AUC 0.6625815494303288
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.4884233   0.83352799  0.50073653  0.60885154  0.59073548  0.57642662
  0.5955743 ], Average AUC 0.5991822497502098
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.63074232  0.93159722  0.73456965  0.6362137   0.55212542  0.47570278
  0.49093042], Average AUC 0.6359830739247083
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.63737111  0.96779514  0.67171454  0.64324495  0.5430424   0.60078701
  0.66566247], Average AUC 0.6756596603200007
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.6479456   0.97234059  0.72426084  0.62173032  0.54141151  0.6747789
  0.6687069 ], Average AUC 0.6930249528931418
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.62161195  0.97018098  0.68715541  0.62546296  0.52791982  0.66244736
  0.67417201], Average AUC 0.6812786410303415
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.63812079  0.96690867  0.73513784  0.65592645  0.55173611  0.67801906
  0.64185649], Average AUC 0.6953864868094005
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.65873053  0.97467119  0.76228956  0.65137574  0.61210017  0.62557381
  0.63942254], Average AUC 0.703451934105843
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.63941761  0.97566551  0.67241425  0.57685711  0.57849064  0.67248631
  0.67857541], Average AUC 0.6848438343860422
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.64565972  0.97219329  0.66966803  0.57632313  0.5881734   0.66075226
  0.63133238], Average AUC 0.6777288880749917
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.62748843  0.96968119  0.69039615  0.63503525  0.55177557  0.66534797
  0.67462323], Average AUC 0.6877639678895066
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.63482744  0.97413457  0.69229535  0.67088331  0.58027936  0.65994683
  0.66064594], Average AUC 0.6961446861872936
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.63310974  0.97683344  0.72746475  0.60981429  0.56540404  0.6566514
  0.65420672], Average AUC 0.6890691980959768
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.65976168  0.9674979   0.65261732  0.64768782  0.52553662  0.66920668
  0.67193978], Average AUC 0.6848925401687407
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.6328388   0.97187237  0.66148464  0.64713542  0.59315025  0.6740577
  0.65829427], Average AUC 0.6912619213474679
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.63552978  0.97418455  0.64536774  0.65971696  0.56217119  0.66081543
  0.65742633], Average AUC 0.6850302841617674
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.66201862  0.97047296  0.69085122  0.61035354  0.55425347  0.64853127
  0.6605265 ], Average AUC 0.6852867968204626
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.62770412  0.97334017  0.67942182  0.66089278  0.5787379   0.66638503
  0.66452911], Average AUC 0.6930015627076906
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.66000368  0.97293771  0.65994581  0.59082492  0.54972117  0.67051485
  0.67406053], Average AUC 0.6825726662907804
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.64963699  0.97522622  0.65087332  0.58137363  0.5526673   0.67092809
  0.65914098], Average AUC 0.6771209328825714
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.64558081  0.97496843  0.67825126  0.67293508  0.58167877  0.66265793
  0.66907053], Average AUC 0.6978775448464638
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.64597275  0.97453441  0.70443234  0.63754209  0.55757839  0.66817225
  0.65683974], Average AUC 0.6921531375883303
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.6302662   0.97023359  0.71454388  0.62862216  0.55381944  0.66383712
  0.66465917], Average AUC 0.6894259372122414
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.64694076  0.97372422  0.68405671  0.6009233   0.54404987  0.68290956
  0.67400479], Average AUC 0.6866584590710777
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.65219907  0.97616004  0.65578178  0.65375631  0.58259154  0.66816435
  0.67036846], Average AUC 0.6941459364590449
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.64504945  0.97472906  0.67776463  0.6429556   0.54771412  0.6564224
  0.65686628], Average AUC 0.685928792365913
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.65568971  0.97244055  0.68675821  0.67320865  0.54038037  0.65915719
  0.66316482], Average AUC 0.6929713568405278
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.64162984  0.97084122  0.69351589  0.60107323  0.57823022  0.67057012
  0.66094056], Average AUC 0.6881144406785474
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.64636206  0.97495791  0.6837621   0.64818497  0.4923427   0.67075437
  0.6819118 ], Average AUC 0.6854679881609381
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.64800084  0.97607586  0.69083018  0.64115899  0.55035774  0.66955675
  0.67942477], Average AUC 0.6936293043486084
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.65380892  0.96976273  0.67670455  0.62317445  0.56880524  0.65791219
  0.66984292], Average AUC 0.6885730008102963
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.64012521  0.97171191  0.71272359  0.64341593  0.55303293  0.66140503
  0.66076803], Average AUC 0.6918832345526347
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.65193077  0.97325337  0.70616319  0.64062763  0.57069918  0.65392451
  0.66376999], Average AUC 0.6943383772543806
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.65213331  0.97639678  0.66233954  0.58727904  0.53947022  0.66463466
  0.67642281], Average AUC 0.679810909848124
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.64952388  0.9761653   0.65403514  0.61916035  0.53984112  0.66239471
  0.66964651], Average AUC 0.68153814586846
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.64941077  0.9739899   0.70041035  0.63560869  0.57018887  0.67240208
  0.66250922], Average AUC 0.6949314134333072
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.65900147  0.97397412  0.72094644  0.63276778  0.56932607  0.66338966
  0.65935863], Average AUC 0.6969663108017602
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.63317287  0.97175926  0.69953441  0.6503025   0.55920665  0.6645557
  0.66749125], Average AUC 0.6922889492315141
((18, 121, 1), 0.74423492133139046)

In [38]:
model = RandomForestClassifier(n_estimators=81, max_depth=1, min_samples_leaf=6, min_samples_split=6)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())


0.92964360587
0.740138453703

In [143]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[143]:
Features Importance Score
5 Week 0.172840
8 Pop 0.172840
0 Year 0.098765
6 Quarter 0.086420
1 Month 0.074074
31 Region_West 0.074074
4 Duration_Sec 0.061728
3 Long 0.061728
9 Milit_Share 0.061728
7 TimePer 0.061728
13 Shape_Cone 0.024691
22 Shape_Light 0.012346
30 Region_Northeast 0.012346
29 Region_Midwest 0.012346
18 Shape_Egg 0.012346
28 Shape_Triangle 0.000000
27 Shape_Teardrop 0.000000
26 Shape_Sphere 0.000000
25 Shape_Rectangle 0.000000
24 Shape_Oval 0.000000
23 Shape_Other 0.000000
19 Shape_Fireball 0.000000
21 Shape_Formation 0.000000
20 Shape_Flash 0.000000
17 Shape_Disk 0.000000
2 Lat 0.000000
15 Shape_Cylinder 0.000000
14 Shape_Cross 0.000000
12 Shape_Circle 0.000000
11 Shape_Cigar 0.000000
10 Shape_Chevron 0.000000
16 Shape_Diamond 0.000000

In [39]:
# 3rd revision: remove the 5 least important features

X = df[['Year', 'Month', 'Lat', 'Duration_Sec', 'Week','Quarter', 'TimePer', 'Pop', 
      'Shape_Egg',  'Shape_Light','Shape_Cone', 'Region_Midwest', 'Region_Northeast','Region_West']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.5         0.49905303  0.49905303  0.56218434  0.53566919  0.56488208
  0.5       ], Average AUC 0.5229773823425118
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.5         0.91401515  0.73598485  0.5         0.50991162  0.58154875
  0.52493139], Average AUC 0.6094845358365786
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.83320707  0.49905303  0.5         0.5         0.56017583
  0.5353626 ], Average AUC 0.5733164559301773
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58661616  0.46262626  0.74242424  0.5         0.55498737  0.5
  0.5353626 ], Average AUC 0.5545738053926753
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.46262626  0.74835859  0.67714646  0.53156566  0.57422879
  0.5       ], Average AUC 0.5827632030530009
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.65409301  0.96136627  0.87688868  0.75584491  0.57416614  0.65166351
  0.64822139], Average AUC 0.7317491286684215
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.56323916  0.95832544  0.87519202  0.77778567  0.57942182  0.65543799
  0.59696247], Average AUC 0.7151949402146274
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65070497  0.9663326   0.88124211  0.70661564  0.59681713  0.64851811
  0.58366202], Average AUC 0.7191275091616023
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.62672822  0.9664194   0.89812973  0.73673453  0.65482691  0.62620552
  0.56737818], Average AUC 0.7252032149680384
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.65636311  0.96261048  0.83925715  0.72749369  0.56185816  0.63271215
  0.58982519], Average AUC 0.7100171339572954
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.65605271  0.96432029  0.88345697  0.76452546  0.59967119  0.65531954
  0.62362974], Average AUC 0.7352822719729781
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.65012889  0.96225537  0.81678767  0.81054293  0.59445234  0.65885186
  0.61265176], Average AUC 0.7293815447372086
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.63895728  0.96401515  0.89879524  0.79544928  0.60833596  0.66531112
  0.61234652], Average AUC 0.7404586515859014
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.63945444  0.96619581  0.88420402  0.77462647  0.60252788  0.62025163
  0.6175462 ], Average AUC 0.7292580653031455
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65487689  0.96337595  0.83938868  0.78760785  0.60278567  0.63323331
  0.62819506], Average AUC 0.7299233436878166
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.66434396  0.96509628  0.88560869  0.78687921  0.58458544  0.6444462
  0.62992297], Average AUC 0.7372689637205119
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.61285511  0.96581439  0.87539457  0.77665983  0.61964699  0.63735786
  0.61067965], Average AUC 0.7283440584294266
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.65656303  0.96332071  0.83424348  0.80978272  0.59932923  0.61040219
  0.59538054], Average AUC 0.7241459846399715
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.63963331  0.96478851  0.88109217  0.7884154   0.60584754  0.64763108
  0.61137772], Average AUC 0.7341122474596505
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.66171349  0.96379156  0.88194181  0.78493529  0.62472643  0.63402822
  0.59745882], Average AUC 0.7355136602296884
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.64755366  0.9641572   0.8764073   0.79417877  0.59193761  0.64886292
  0.59943889], Average AUC 0.7317909060710274
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.65016572  0.96377578  0.87969802  0.79370265  0.61210017  0.63726574
  0.61781162], Average AUC 0.7363599575230209
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.65005787  0.96314184  0.8559133   0.78033986  0.60561606  0.62703727
  0.60583564], Average AUC 0.7268488324947538
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.64510995  0.9667298   0.87843803  0.78861532  0.61835806  0.66433986
  0.61272077], Average AUC 0.7391873979754393
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.65271465  0.965504    0.88393571  0.76981008  0.63645833  0.63449937
  0.62083747], Average AUC 0.7376799438843985
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.63984112  0.96428609  0.8742661   0.77058607  0.60462963  0.65745157
  0.61663844], Average AUC 0.732528431229623
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.62096749  0.96338121  0.89055135  0.79398674  0.59135364  0.66035744
  0.6240093 ], Average AUC 0.7349438813221422
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.64287405  0.96376263  0.87514205  0.79468645  0.60432713  0.63549695
  0.61028947], Average AUC 0.7323683882016789
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.65782565  0.96351273  0.88336227  0.79285038  0.59933712  0.63250158
  0.61544668], Average AUC 0.7349766308114172
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.65014731  0.96276042  0.89067235  0.78414615  0.60430082  0.6456096
  0.60274609], Average AUC 0.7343403903181678
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.64275568  0.96377841  0.88727641  0.77516835  0.59453914  0.64133765
  0.59799232], Average AUC 0.7289782809084985
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.64011469  0.96434659  0.88231797  0.77488952  0.59142466  0.6436355
  0.60624174], Average AUC 0.7289958107422222
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.64916088  0.965625    0.87783039  0.78293613  0.61944707  0.66171826
  0.61431066], Average AUC 0.7387183420905484
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.65080755  0.96316025  0.89242424  0.77857481  0.62152515  0.64645715
  0.61741879], Average AUC 0.7386239922007459
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.65294613  0.96558291  0.87798032  0.77628104  0.60712332  0.64645188
  0.61306317], Average AUC 0.7342041101473483
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.64750631  0.96342066  0.86479377  0.77316656  0.59374474  0.6437908
  0.61451239], Average AUC 0.7287050335884879
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.64765362  0.96362321  0.88147359  0.78485901  0.60781776  0.64272215
  0.61218992], Average AUC 0.7343341796588974
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.64520728  0.96406513  0.87970854  0.77799611  0.59503893  0.64571752
  0.61859197], Average AUC 0.7323322121749277
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.65195181  0.96448338  0.87187763  0.7928609   0.59996054  0.64042693
  0.61508305], Average AUC 0.7338063203978862
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65765993  0.96548032  0.87645728  0.79056187  0.6208044   0.63676827
  0.60550917], Average AUC 0.7361773195498583
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.57368213  0.9219697   0.79738268  0.65230166  0.51377052  0.63492841
  0.48696762], Average AUC 0.6544289601907687
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.59337384  0.66853956  0.7872238   0.72457912  0.53156566  0.60611181
  0.64971573], Average AUC 0.6515870755960774
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.5885101   0.62834333  0.85112847  0.7264678   0.53815499  0.63171984
  0.5353626 ], Average AUC 0.6428124465515036
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.56717172  0.86832386  0.6418508   0.74229535  0.58893098  0.54430406
  0.55622502], Average AUC 0.6441573981567046
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.61858691  0.83485375  0.67726484  0.69726957  0.43750789  0.54883133
  0.56277571], Average AUC 0.6252985711240218
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.68723432  0.959617    0.81626684  0.72672822  0.61544876  0.6789561
  0.65872957], Average AUC 0.7347115434007885
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.64648043  0.96101904  0.79835859  0.71786616  0.63049505  0.66804854
  0.64666599], Average AUC 0.7241334009834973
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.68203914  0.96546191  0.7733428   0.75920139  0.62793298  0.69507528
  0.65751392], Average AUC 0.737223917179144
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.66967593  0.96515941  0.77855114  0.72473169  0.58393045  0.68030112
  0.58198453], Average AUC 0.7120477511651112
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.63264941  0.96775042  0.84752473  0.74648043  0.59047769  0.66317119
  0.65638321], Average AUC 0.7292052978823599
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.66909196  0.97005997  0.76065867  0.75492161  0.59717487  0.68161981
  0.64123274], Average AUC 0.7249656638822921
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.68656881  0.96664562  0.83149463  0.75328283  0.62738584  0.67637134
  0.63429453], Average AUC 0.7394348004895852
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.66315762  0.96840804  0.7935343   0.76134785  0.61677715  0.68163298
  0.64467796], Average AUC 0.7327908418427448
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.68244423  0.97051505  0.76443077  0.74742477  0.62544192  0.67311013
  0.6491026 ], Average AUC 0.7303527799865265
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.66214489  0.9666088   0.77135943  0.75193866  0.63195497  0.66020478
  0.63991092], Average AUC 0.7263032053240651
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.68693708  0.7223327   0.79943708  0.75192551  0.61266572  0.68258581
  0.64153798], Average AUC 0.6996316960349899
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.6881734   0.96991793  0.81082965  0.75544245  0.59726168  0.67102285
  0.65874284], Average AUC 0.7359129703270917
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.66461753  0.96842119  0.81591698  0.75354324  0.62339278  0.65206096
  0.64426655], Average AUC 0.7317456057982162
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.66731376  0.9725826   0.79450495  0.76434922  0.60056292  0.68047484
  0.64634483], Average AUC 0.7323047305086108
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.67731745  0.971633    0.806371    0.75586595  0.60166509  0.67835597
  0.64069127], Average AUC 0.73312853231337
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.66967593  0.97608638  0.81414141  0.76166877  0.60747317  0.67762424
  0.64615107], Average AUC 0.7361172815803771
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.67808291  0.96920507  0.77131734  0.75488215  0.62053872  0.66197094
  0.64672704], Average AUC 0.7289605975420567
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.67330861  0.97491056  0.767127    0.75327231  0.61692971  0.66790377
  0.63399194], Average AUC 0.726777700163094
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.67103062  0.96726904  0.78242845  0.75704966  0.62374527  0.67039903
  0.64628113], Average AUC 0.7311718860061205
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.66042719  0.96777936  0.80057607  0.75842014  0.61806871  0.67608707
  0.6360251 ], Average AUC 0.731054804887747
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.69057765  0.94537037  0.79826915  0.76818708  0.615625    0.68147242
  0.63441928], Average AUC 0.7334172774750959
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.68842856  0.97309028  0.81421507  0.76065341  0.60795455  0.68336492
  0.64921408], Average AUC 0.7395601215741853
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.6777541   0.96922085  0.81423348  0.76596433  0.60198074  0.67999052
  0.64121151], Average AUC 0.7357650779061824
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.67888521  0.97181713  0.78352799  0.76574337  0.61455703  0.67410244
  0.63763888], Average AUC 0.7323245786075808
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.67822759  0.97149095  0.84651199  0.74244266  0.60746528  0.67545799
  0.64649878], Average AUC 0.7382993193656285
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.67783302  0.97376631  0.7618266   0.76413878  0.61847643  0.67219678
  0.64398785], Average AUC 0.7303179675692932
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.66484638  0.97261942  0.77036248  0.74918192  0.60206229  0.67351284
  0.63634626], Average AUC 0.7241330862399293
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.67907197  0.96912616  0.77275884  0.76389941  0.61863163  0.66984892
  0.65292207], Average AUC 0.7323227123527103
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.66541982  0.96812658  0.7539536   0.75520833  0.62315867  0.67349442
  0.6409567 ], Average AUC 0.7257597316828939
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.6696996   0.9740846   0.82524463  0.77392414  0.61495949  0.67782954
  0.65289818], Average AUC 0.7412343112235481
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.67658617  0.9712963   0.78554556  0.76104535  0.61766362  0.67885871
  0.65663536], Average AUC 0.7353758666050888
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.66151357  0.973619    0.79602799  0.74537826  0.61203178  0.67539745
  0.65532682], Average AUC 0.731327838853833
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.67542351  0.9730403   0.75111006  0.760998    0.60368529  0.68303853
  0.63471921], Average AUC 0.7260021280010948
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.68283617  0.97138047  0.82150673  0.75579493  0.62473695  0.67439987
  0.64770912], Average AUC 0.7397663214045804
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.67586279  0.9748448   0.81030093  0.76832912  0.61506471  0.67416825
  0.65043238], Average AUC 0.7384289972106558
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.56629314  0.74646991  0.71814499  0.67021254  0.57789615  0.59767583
  0.55329738], Average AUC 0.6328557047651563
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.61463331  0.90310659  0.7921875   0.68858638  0.61435974  0.57378922
  0.69187852], Average AUC 0.6969344665600629
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.64800873  0.91982323  0.77660196  0.61199495  0.55778093  0.60907033
  0.57161968], Average AUC 0.6706999735078154
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.61380471  0.83382786  0.77201705  0.68637153  0.4472722   0.58619183
  0.62887189], Average AUC 0.6526224385582383
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.61566183  0.84608586  0.77129367  0.67874053  0.63410932  0.55983628
  0.55605249], Average AUC 0.665968567930085
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.68959122  0.76843697  0.69162195  0.74257418  0.63647412  0.67298115
  0.62856134], Average AUC 0.6900344198717855
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.67693077  0.97158039  0.68273885  0.75996423  0.61802399  0.67276005
  0.64214846], Average AUC 0.7177352468358337
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.67811185  0.96854482  0.70330387  0.73439867  0.6275726   0.68325174
  0.66285428], Average AUC 0.7225768330935429
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.65054977  0.97031513  0.68763415  0.64968171  0.61486479  0.68138555
  0.66663393], Average AUC 0.7030092922159803
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.67027567  0.97196444  0.70760995  0.73779987  0.64342908  0.67261266
  0.64642711], Average AUC 0.7214455408923112
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.66917877  0.96178451  0.73035564  0.72710175  0.63313605  0.67776637
  0.64580336], Average AUC 0.7207323496052804
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.67443182  0.9712016   0.68927294  0.74207439  0.61806345  0.67206254
  0.64959629], Average AUC 0.7166718599564866
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.65967224  0.97130682  0.69127473  0.70600274  0.60798874  0.68348863
  0.65902154], Average AUC 0.7112507758480122
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.67402146  0.97133312  0.71571444  0.73836279  0.61195023  0.68221468
  0.66515286], Average AUC 0.7226785121144529
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.68558502  0.96744529  0.7281855   0.72043613  0.60807029  0.68639977
  0.68075986], Average AUC 0.7252688356875705
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.68064499  0.96811869  0.74315288  0.70795191  0.62392414  0.67336281
  0.65417753], Average AUC 0.72161899340581
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.6853509   0.96891572  0.71691656  0.72629682  0.58695023  0.66970415
  0.65427308], Average AUC 0.7154867811696646
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.67989531  0.97288247  0.71632471  0.74303188  0.62368739  0.67694778
  0.66251188], Average AUC 0.7250402022165642
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.67908512  0.97382418  0.75504787  0.74146675  0.6102904   0.67877711
  0.66735058], Average AUC 0.7294060029530199
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.67923506  0.97269834  0.71716646  0.7472722   0.61380471  0.67854285
  0.66107327], Average AUC 0.724256127439969
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.69410774  0.96357586  0.73960175  0.72770149  0.62433449  0.67996684
  0.66223318], Average AUC 0.7273601935485999
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.67356639  0.97479745  0.72187763  0.73855745  0.62290878  0.68691303
  0.64670581], Average AUC 0.7236180777681581
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.67194865  0.9741714   0.69162721  0.73751578  0.61208439  0.68565224
  0.64964141], Average AUC 0.7175201551161312
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.67055187  0.97286932  0.75408249  0.75575284  0.60556345  0.67727153
  0.65488356], Average AUC 0.7272821513369764
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.66303662  0.97198548  0.72662826  0.74429977  0.61999947  0.67682407
  0.65825711], Average AUC 0.723004397208723
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.68212595  0.97123053  0.75207281  0.73653988  0.61941025  0.67375763
  0.65158698], Average AUC 0.726674861365228
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.68389888  0.97275621  0.70424558  0.72442919  0.61254209  0.67107286
  0.64346231], Average AUC 0.7160581597678443
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.68345434  0.97389257  0.68402515  0.72783302  0.62784617  0.68648136
  0.66430881], Average AUC 0.7211202017652936
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.68755524  0.97170139  0.73082386  0.73082649  0.62677031  0.67661876
  0.66075211], Average AUC 0.7264354520598192
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.68189447  0.9705545   0.7380524   0.71506208  0.61625368  0.6738945
  0.64224401], Average AUC 0.7197079492023523
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.67952704  0.9738347   0.72035196  0.72966909  0.62595749  0.68236997
  0.67153899], Average AUC 0.7261784626727085
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.68966751  0.97384785  0.72063868  0.7310343   0.62399779  0.68458886
  0.65294861], Average AUC 0.7252462287633489
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.68113426  0.97227746  0.70490583  0.72485006  0.62099905  0.68906875
  0.65626377], Average AUC 0.7213570267020264
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.68653725  0.97153304  0.73411721  0.72626     0.61122159  0.6777769
  0.65668314], Average AUC 0.7234470182605064
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.6711069   0.97240109  0.74316604  0.73854956  0.61861059  0.67867709
  0.66653042], Average AUC 0.7270059542956779
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.69027778  0.97568392  0.69612005  0.74404198  0.604506    0.67152295
  0.64580867], Average AUC 0.7182801939941257
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.68010838  0.97364531  0.69412616  0.73526147  0.60711806  0.68825542
  0.65515429], Average AUC 0.7190955826188672
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.68319129  0.97610217  0.7162642   0.73659775  0.60967224  0.68113813
  0.66983496], Average AUC 0.7246858206399196
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.67746475  0.97406881  0.71156355  0.73594013  0.61480166  0.68211729
  0.66005139], Average AUC 0.7222867977283532
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.67353483  0.97294297  0.74220328  0.72588384  0.62167508  0.67634502
  0.66498829], Average AUC 0.7253676169946178
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.57195128  0.80360375  0.72486585  0.74748264  0.6264073   0.62980101
  0.61340291], Average AUC 0.673930676765592
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.65614215  0.95354588  0.65062605  0.7078125   0.58220223  0.43620499
  0.57417041], Average AUC 0.6515291732903432
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.52492635  0.69453914  0.72347433  0.67277199  0.61022201  0.60242419
  0.66069372], Average AUC 0.6412931038814034
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.57160669  0.93420402  0.81267361  0.41962595  0.61661669  0.61331859
  0.59319078], Average AUC 0.6516051903961397
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.55516098  0.91633523  0.81502262  0.70278041  0.59836122  0.5309302
  0.58067068], Average AUC 0.6713230472773379
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.68755524  0.97602325  0.63889941  0.71480429  0.60148622  0.67674774
  0.66552711], Average AUC 0.7087204653627596
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.68067919  0.95890941  0.64167193  0.62639941  0.60813342  0.67252053
  0.68000074], Average AUC 0.6954735177138422
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.6556108   0.97087279  0.68920455  0.68012153  0.63659249  0.67460255
  0.66000361], Average AUC 0.7095726148788541
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.6743529   0.97119634  0.70710227  0.68641625  0.62983481  0.6676932
  0.69466548], Average AUC 0.7187516072211474
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.68707649  0.97217224  0.72197759  0.69202441  0.59350537  0.66217625
  0.6702968 ], Average AUC 0.7141755926808724
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.68247843  0.97529724  0.6562842   0.71517519  0.63624263  0.68441777
  0.67951767], Average AUC 0.7184875907313989
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.66849221  0.97309817  0.65508733  0.69842435  0.64005419  0.68093809
  0.66744082], Average AUC 0.7119335950978621
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.69306871  0.96722433  0.68824705  0.69872685  0.60484007  0.6736734
  0.6679584 ], Average AUC 0.7133912592954542
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.68555082  0.97469223  0.654627    0.69075652  0.62370844  0.689761
  0.68375651], Average AUC 0.7146932183862781
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.65821759  0.9740767   0.69293192  0.67988742  0.61377578  0.67097284
  0.67106388], Average AUC 0.7087037326563855
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.6746028   0.97117266  0.67547612  0.71180293  0.6190604   0.67826385
  0.6662411 ], Average AUC 0.7138028350900842
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.68958596  0.93552452  0.63478535  0.69288984  0.5835201   0.69365393
  0.67910361], Average AUC 0.7012947570289114
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.68367793  0.97482376  0.66124263  0.69375789  0.62053609  0.67278638
  0.67105857], Average AUC 0.7111261777805298
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.68063973  0.97327704  0.68874947  0.68742372  0.60323285  0.68737892
  0.66612962], Average AUC 0.7124044794813221
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.68701599  0.97396622  0.69302662  0.71122685  0.59982113  0.67900084
  0.66925899], Average AUC 0.7161880923477185
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.68532723  0.92542614  0.67751736  0.6820181   0.62504209  0.66000737
  0.67509303], Average AUC 0.7043473306856417
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.67815394  0.9745423   0.62726484  0.70255156  0.56687973  0.67640029
  0.65696449], Average AUC 0.6975367353326886
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.68062395  0.97311658  0.6572259   0.7104456   0.61927083  0.68197778
  0.66590401], Average AUC 0.7126520952576298
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.67514205  0.97053872  0.64281881  0.68573232  0.60368266  0.68313329
  0.65882512], Average AUC 0.7028389964959484
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.66004577  0.97349011  0.67728588  0.69467066  0.60854903  0.67293378
  0.66390005], Average AUC 0.7072678976075905
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.68176557  0.97082544  0.6720907   0.68744213  0.5962358   0.68623921
  0.67475063], Average AUC 0.7099070684772247
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.68244949  0.97390835  0.66598537  0.69583333  0.58441183  0.676753
  0.66508916], Average AUC 0.7063472201128006
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.67875368  0.97148832  0.67159354  0.66862374  0.6187842   0.67396031
  0.67302537], Average AUC 0.7080327361718949
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.67378998  0.97406618  0.68103167  0.68733954  0.62068866  0.67626342
  0.66602611], Average AUC 0.711315080921915
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.65976694  0.97278514  0.71171875  0.71983112  0.60371949  0.6834044
  0.66177665], Average AUC 0.7161432135064091
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.68628998  0.97318497  0.64172717  0.70477168  0.61275779  0.6865419
  0.67113554], Average AUC 0.7109155760405358
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.68352799  0.97434501  0.67226168  0.71863689  0.59970802  0.6905638
  0.65646284], Average AUC 0.7136437468954094
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.68439867  0.97273253  0.66034564  0.69156408  0.62753577  0.68150927
  0.66198899], Average AUC 0.7114392799738916
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.68880471  0.97440814  0.67264573  0.72035196  0.61422559  0.68017741
  0.67125498], Average AUC 0.7174097887546227
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.68117635  0.97307976  0.71041404  0.70941709  0.62106745  0.67856391
  0.66169437], Average AUC 0.7193447068738291
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.68997264  0.93938868  0.66579598  0.68456702  0.62658617  0.65909665
  0.67094974], Average AUC 0.7051938424520027
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.69852431  0.9727299   0.65949337  0.71756103  0.63030566  0.69029006
  0.66684627], Average AUC 0.7193929428262623
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.6749395   0.97413721  0.64014099  0.71000894  0.62225905  0.67697147
  0.66961996], Average AUC 0.7097253031619688
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.67339015  0.97381366  0.70408512  0.70542403  0.60875421  0.6748526
  0.67125498], Average AUC 0.7159392508032136
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.67580492  0.9737137   0.70764152  0.69435238  0.61116898  0.68227258
  0.67189731], Average AUC 0.716693056933095
((30, 121, 2), 0.7412343112235481)

In [40]:
model = RandomForestClassifier(n_estimators=21, max_depth=1, min_samples_leaf=18, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())


0.92964360587
0.727705233125

In [146]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df.head


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[146]:
<bound method NDFrame.head of             Features  Importance Score
1              Month          0.285714
7                Pop          0.238095
5            Quarter          0.190476
4               Week          0.095238
0               Year          0.047619
3       Duration_Sec          0.047619
6            TimePer          0.047619
13       Region_West          0.047619
2                Lat          0.000000
8          Shape_Egg          0.000000
9        Shape_Light          0.000000
10        Shape_Cone          0.000000
11    Region_Midwest          0.000000
12  Region_Northeast          0.000000>

In [41]:
# 4th revision: remove the least important/correlated features

X = df[['Year', 'Month','Duration_Sec', 'Week', 'TimePer', 'Pop','Region_West']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57329545  0.90833333  0.74835859  0.65751263  0.5         0.55721204
  0.54110905], Average AUC 0.6408315853189033
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.83320707  0.74488636  0.57929293  0.54583333  0.5
  0.52041651], Average AUC 0.6155789812622919
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.90833333  0.74488636  0.65997475  0.55498737  0.56017583
  0.54721118], Average AUC 0.6515693555605209
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485  0.90833333  0.74835859  0.65997475  0.55069444  0.55670141
  0.53451058], Average AUC 0.6474368504173429
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.5         0.90833333  0.74242424  0.65751263  0.55069444  0.56488208
  0.52493139], Average AUC 0.6355397306417734
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.65897254  0.95926978  0.89020412  0.7726694   0.57723327  0.61926195
  0.59599102], Average AUC 0.7248002977225079
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.65807818  0.95922243  0.88576389  0.76861585  0.60715225  0.62224152
  0.59599102], Average AUC 0.7281521627635115
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.65589489  0.95888573  0.83426452  0.75863847  0.60956702  0.61344757
  0.57289903], Average AUC 0.7147996037390626
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.66077178  0.95921454  0.88786301  0.77114373  0.57723327  0.61729838
  0.59198841], Average AUC 0.7236447300845049
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.66112426  0.95852799  0.89185869  0.75433239  0.60589489  0.61296326
  0.59558757], Average AUC 0.7257555775202237
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.67272201  0.95932765  0.88932029  0.76782407  0.61213699  0.63653664
  0.59770036], Average AUC 0.7336525734302283
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.65560027  0.95924085  0.89044087  0.8030303   0.60258049  0.62043062
  0.59599102], Average AUC 0.7324734881386112
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.65807818  0.95900936  0.8928346   0.76893676  0.59306082  0.63918983
  0.59469043], Average AUC 0.7293999974369203
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.67746212  0.95809659  0.89211648  0.76922348  0.59194024  0.62241525
  0.59599102], Average AUC 0.7296064533227451
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65948811  0.95834386  0.8822338   0.80081545  0.60258049  0.61383449
  0.60174013], Average AUC 0.7312909027598353
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.67213805  0.95900147  0.89211911  0.76594066  0.60093645  0.62484734
  0.59599102], Average AUC 0.7301391552309011
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.675       0.95862795  0.89599642  0.76802136  0.60258049  0.62024637
  0.59492932], Average AUC 0.7307717007812015
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.64503893  0.95844907  0.88600852  0.77405566  0.60784144  0.6220836
  0.59877267], Average AUC 0.7274642706043444
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.65560027  0.95918824  0.87994529  0.76847643  0.60128367  0.62271004
  0.6055304 ], Average AUC 0.7275334772389463
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.65807818  0.95922243  0.89890309  0.79594381  0.60106797  0.63308854
  0.59499302], Average AUC 0.7344710074985664
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.67647043  0.95904619  0.88832071  0.7702862   0.61712963  0.62008318
  0.59599102], Average AUC 0.7324753357118226
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027  0.95900673  0.8946049   0.76901305  0.60258049  0.61828806
  0.59756234], Average AUC 0.72809369229795
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.6587016   0.96341277  0.88813657  0.75742056  0.61421244  0.62597126
  0.60174013], Average AUC 0.7299421897456732
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027  0.95896991  0.88939394  0.77253788  0.60119423  0.62008318
  0.59599102], Average AUC 0.7276814895117264
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.65560027  0.95926452  0.89406829  0.78978325  0.60135732  0.62424721
  0.62809419], Average AUC 0.7360592938895858
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.65560027  0.96338647  0.89107218  0.77223274  0.60256208  0.62377606
  0.59784369], Average AUC 0.7294962128155753
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.65560027  0.95895676  0.89624632  0.77105166  0.60119423  0.62017267
  0.59796578], Average AUC 0.7287410988199335
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.67037826  0.95885943  0.89374474  0.77139362  0.59282407  0.63801853
  0.59770036], Average AUC 0.731845573240016
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.6558186   0.96339173  0.89308186  0.79545981  0.60093645  0.63003527
  0.60326898], Average AUC 0.7345703848752215
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.65634733  0.95892519  0.89488899  0.77554977  0.60119423  0.63805801
  0.60174013], Average AUC 0.7323862361701956
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.67020202  0.95899621  0.88481955  0.77241688  0.60256208  0.62017267
  0.63105634], Average AUC 0.7343179636136089
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.65560027  0.9630524   0.89122738  0.78762626  0.60119423  0.62024637
  0.60357687], Average AUC 0.7317891120831584
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.66693234  0.95888047  0.89479693  0.76884207  0.6044455   0.61968836
  0.59599102], Average AUC 0.7299395256931872
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.67176189  0.95889888  0.89810343  0.78462753  0.61716909  0.6342888
  0.59782776], Average AUC 0.7375253392041617
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.65652094  0.96362058  0.89141677  0.77433186  0.58514047  0.6247789
  0.59782776], Average AUC 0.7276624685424716
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.66619318  0.95899884  0.88906776  0.77356639  0.59282407  0.63907138
  0.59599102], Average AUC 0.7308160934265933
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.6558186   0.95894623  0.8927741   0.76913142  0.60061816  0.62423142
  0.59782776], Average AUC 0.7284782410758097
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66735848  0.96369949  0.89367635  0.77721223  0.61183712  0.63073542
  0.59770036], Average AUC 0.7346027777452858
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.65862795  0.95892519  0.88408302  0.76942603  0.61717435  0.63788166
  0.60077929], Average AUC 0.7324139256607458
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.6558186   0.95891993  0.89511785  0.77436606  0.59802189  0.62024637
  0.59796578], Average AUC 0.7286366381747668
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.63887311  0.83485375  0.62899306  0.6775726   0.5648306   0.64635713
  0.52722732], Average AUC 0.6455296499019408
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.6222722   0.83233375  0.83116056  0.67714646  0.55069444  0.55225837
  0.54997691], Average AUC 0.659406100949045
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.61141625  0.9169718   0.78473011  0.67714646  0.51080072  0.56756686
  0.66347803], Average AUC 0.676015746076901
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.66940236  0.94820602  0.78357008  0.69726957  0.52180661  0.60324805
  0.57227528], Average AUC 0.6851111370883721
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.615383    0.94820602  0.78174979  0.68347538  0.57835911  0.59618341
  0.54500549], Average AUC 0.6783374566841324
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.69453651  0.95781776  0.75669455  0.7654698   0.62606534  0.65456675
  0.63686915], Average AUC 0.7274314094619834
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.69069339  0.96869213  0.78718434  0.74428662  0.5849537   0.68401506
  0.63195878], Average AUC 0.7273977179662487
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.70278567  0.9643045   0.80931187  0.75772569  0.6093145   0.6682749
  0.63753802], Average AUC 0.7356078795693032
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.70326968  0.9683186   0.72114636  0.76553556  0.62443708  0.67842967
  0.64702166], Average AUC 0.7297369449360619
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.7089173   0.96689289  0.83337542  0.73354377  0.63910985  0.66973837
  0.65842433], Average AUC 0.7442859887850186
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69948706  0.96897096  0.75886732  0.77781987  0.63238373  0.6706175
  0.65448277], Average AUC 0.7375184570783814
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.690504    0.9352904   0.77148569  0.76566972  0.63102641  0.67120973
  0.65692202], Average AUC 0.7317297103274077
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.70687605  0.96814499  0.7162379   0.75010522  0.64551768  0.65118183
  0.65976738], Average AUC 0.7282615783352877
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.70265415  0.96732428  0.82441867  0.7628709   0.64370791  0.67691093
  0.66634727], Average AUC 0.7491763007418905
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.69418666  0.96034564  0.80751263  0.76848169  0.62609165  0.6807091
  0.65928431], Average AUC 0.7423730957396326
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.68640309  0.96873422  0.79761153  0.781008    0.63090278  0.67385765
  0.66164925], Average AUC 0.7428809310247738
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.69753262  0.72207755  0.77500526  0.76676662  0.63596644  0.67285744
  0.65433678], Average AUC 0.7035061015160099
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.69125105  0.97009417  0.79708018  0.75495318  0.635756    0.67247052
  0.65611779], Average AUC 0.7396746971578915
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.69501789  0.96958912  0.84584386  0.76309186  0.63507997  0.66054169
  0.65203289], Average AUC 0.7458853242374369
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.6923085   0.96941025  0.82859848  0.74042245  0.63739741  0.67133607
  0.6538776 ], Average AUC 0.741907252491708
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.68543245  0.96935764  0.79227694  0.75905408  0.6170665   0.67029901
  0.66200226], Average AUC 0.7364984109941985
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.68358586  0.97096749  0.78146307  0.7712621   0.62994266  0.67363656
  0.6471995 ], Average AUC 0.736865317710735
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.70628157  0.97107008  0.83085543  0.75571075  0.64538878  0.65577227
  0.64291288], Average AUC 0.743998822533697
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.69392624  0.96720328  0.80880419  0.75667088  0.62287458  0.66256054
  0.65592933], Average AUC 0.7382812912751959
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.69892151  0.97251157  0.83161827  0.75169139  0.62890888  0.65780164
  0.63175972], Average AUC 0.7390304255300422
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.70714173  0.97383733  0.79604903  0.75963279  0.62827757  0.66709307
  0.64531233], Average AUC 0.7396205492871645
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.69481797  0.97022043  0.76325231  0.75745739  0.62192235  0.66280533
  0.65659555], Average AUC 0.7324387617707163
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69397622  0.97170665  0.77754367  0.76033512  0.6206834   0.66359234
  0.64397989], Average AUC 0.7331167544757556
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.69553872  0.9709596   0.77822233  0.7533065   0.63947811  0.66831175
  0.64783387], Average AUC 0.7362358394076123
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.69589646  0.9692077   0.80825968  0.77203546  0.63607955  0.66975153
  0.64869915], Average AUC 0.742847075545364
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.68722643  0.97407407  0.7844697   0.77137521  0.64036195  0.66722468
  0.66195714], Average AUC 0.7409555976201171
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.6928267   0.9746291   0.76345749  0.75245949  0.62916667  0.66777216
  0.6579041 ], Average AUC 0.7340308166005064
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.70580282  0.96910511  0.76809501  0.76566972  0.63636101  0.66431617
  0.65872161], Average AUC 0.7382959211428849
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.69819813  0.96912879  0.78169192  0.75426662  0.61307344  0.67598179
  0.64511591], Average AUC 0.7339223711903358
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.6890967   0.97109638  0.83774989  0.77587069  0.62129104  0.6758844
  0.65217091], Average AUC 0.74616571458013
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.69085385  0.97054135  0.79502315  0.76147675  0.62872212  0.66049958
  0.65859951], Average AUC 0.7379594718884829
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.69866635  0.97265362  0.78857323  0.76965225  0.62216961  0.66403717
  0.65075885], Average AUC 0.7380730119517066
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69221117  0.97522359  0.7790483   0.76149253  0.64095907  0.66815382
  0.66099365], Average AUC 0.7397260180956032
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.68602694  0.96812395  0.82931397  0.76172664  0.62694129  0.65985734
  0.65071107], Average AUC 0.7403858855216952
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.69533354  0.97232744  0.77614952  0.7632076   0.63998316  0.66950937
  0.65119415], Average AUC 0.7382435401322228
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.68209964  0.73397254  0.68238899  0.38550347  0.6268729   0.63943462
  0.55750702], Average AUC 0.615397025771136
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.62254577  0.83428293  0.66913931  0.6478588   0.61027199  0.58856338
  0.60800947], Average AUC 0.6543816646628884
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.67779619  0.83155513  0.78802083  0.73547454  0.61327599  0.63694199
  0.56877432], Average AUC 0.6931198560792324
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.62340593  0.68424874  0.69510995  0.7459596   0.63017677  0.63852127
  0.57530112], Average AUC 0.6561033402234947
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.60818603  0.95167824  0.79201915  0.64510732  0.58224169  0.55913877
  0.63448563], Average AUC 0.6818366895774212
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.71414668  0.92401094  0.70344329  0.74855587  0.62906408  0.67254685
  0.6665835 ], Average AUC 0.7226216009221093
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.68917035  0.97260101  0.67621791  0.720252    0.62556292  0.66597968
  0.67016409], Average AUC 0.7171354219274537
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.70936185  0.97226694  0.73782618  0.76964962  0.64105114  0.6614761
  0.66382839], Average AUC 0.7364943158328563
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.70451652  0.97062027  0.704385    0.73643466  0.6524516   0.67280743
  0.66313828], Average AUC 0.7291933932967191
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.68021096  0.97245107  0.67987689  0.68015309  0.62717014  0.67095968
  0.63884657], Average AUC 0.7070954867626652
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.69765099  0.9666009   0.71803451  0.7287642   0.64928451  0.68447305
  0.67312092], Average AUC 0.7311327273452468
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.71542245  0.95863058  0.66466488  0.73414878  0.64554924  0.67825595
  0.66737447], Average AUC 0.7234351922734836
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.69494423  0.9727036   0.64557292  0.75379051  0.64743003  0.66983839
  0.66051323], Average AUC 0.7206847001492264
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70595802  0.97126999  0.71014047  0.74668824  0.6355061   0.68241998
  0.66232608], Average AUC 0.7306155541369577
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.69696707  0.97052031  0.71014836  0.73296507  0.6313105   0.68222257
  0.66923244], Average AUC 0.7276237595899306
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.70136258  0.97166982  0.677883    0.72451599  0.64169297  0.67521057
  0.66532273], Average AUC 0.7225225242593256
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.69720907  0.96664562  0.68990425  0.72754893  0.64422085  0.67699516
  0.66787612], Average AUC 0.7243428574829213
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.70137048  0.97298769  0.70582386  0.73938868  0.63872054  0.68123816
  0.67331999], Average AUC 0.730407056069491
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.70332492  0.97199863  0.72697548  0.74869266  0.62783565  0.67438408
  0.6619943 ], Average AUC 0.7307436736263927
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.70834122  0.97173822  0.74649095  0.73553504  0.6283407   0.66757212
  0.66164925], Average AUC 0.7313810706170841
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.70248316  0.96021149  0.71471223  0.74829019  0.63455124  0.68001948
  0.67000218], Average AUC 0.7300385672528039
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.69921612  0.94019886  0.65208333  0.7338831   0.64318182  0.68220678
  0.66259416], Average AUC 0.7161948823558628
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70089699  0.97105692  0.68987005  0.74558607  0.62094118  0.67164403
  0.67221317], Average AUC 0.7246012027335932
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.70795981  0.97261679  0.69682239  0.74778251  0.62987689  0.67528427
  0.66644813], Average AUC 0.7281129714431476
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.69492319  0.97231429  0.74127999  0.74243739  0.64197969  0.67267583
  0.67089401], Average AUC 0.7337863407812317
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.69943708  0.97471591  0.70036301  0.74372633  0.62666772  0.68419931
  0.67050914], Average AUC 0.72851692594068
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.69801136  0.96507523  0.69113005  0.74809291  0.63622159  0.67710044
  0.66625703], Average AUC 0.7259840877301807
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.69476799  0.97169613  0.66619581  0.73430924  0.64006471  0.66652716
  0.6691714 ], Average AUC 0.7203903485347826
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.70082334  0.97239057  0.72987426  0.74053293  0.64505471  0.67093072
  0.65733609], Average AUC 0.7309918042345195
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.69437079  0.97258523  0.76016677  0.74790614  0.63471959  0.67263108
  0.66835389], Average AUC 0.735819070464796
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.69987374  0.97482902  0.6855061   0.74217172  0.61773464  0.68105654
  0.66619598], Average AUC 0.7239096760291914
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.69764047  0.97284564  0.67972959  0.73174979  0.64362637  0.68018004
  0.67050914], Average AUC 0.7251830046503469
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.69547033  0.9706229   0.69286616  0.74227694  0.63708176  0.67717941
  0.66482108], Average AUC 0.7257597942390231
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70479009  0.97251947  0.6937237   0.73201547  0.63715015  0.67229417
  0.66899356], Average AUC 0.7259266559449254
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70081282  0.97201441  0.73667666  0.74856376  0.64077231  0.68253843
  0.65945418], Average AUC 0.7344046529877959
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.70453756  0.97242477  0.69580966  0.72805398  0.60549769  0.67532638
  0.66072291], Average AUC 0.7203389928190627
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.69750105  0.97299558  0.6897359   0.73386206  0.63407776  0.6772768
  0.66366913], Average AUC 0.7241597535973557
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.70042614  0.97394518  0.693871    0.72899306  0.64122738  0.67503159
  0.66422122], Average AUC 0.7253879362982965
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.70458754  0.97050189  0.70500316  0.74387363  0.64367109  0.68326227
  0.67158676], Average AUC 0.7317837630264015
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.7072943   0.97162511  0.71932607  0.74343434  0.63599274  0.67518688
  0.66578457], Average AUC 0.7312348586677792
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.59601747  0.95280408  0.59833228  0.68731587  0.57264836  0.66653243
  0.51943443], Average AUC 0.6561549879216795
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.6479456   0.83720539  0.64113268  0.71915246  0.64648569  0.61598231
  0.5901145 ], Average AUC 0.6711455198401296
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.64408407  0.84958439  0.67189604  0.73719487  0.65228851  0.63150137
  0.62735366], Average AUC 0.6877004144446851
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.67937184  0.9333649   0.61949968  0.63537458  0.6088831   0.60923615
  0.55790781], Average AUC 0.6633768678927126
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.61474642  0.84851115  0.74073811  0.66805556  0.57173032  0.6399163
  0.60217277], Average AUC 0.6694100906265376
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.71357849  0.9261916   0.67811185  0.72282723  0.64125105  0.66858023
  0.66060612], Average AUC 0.7158780828057978
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.68932029  0.94222959  0.64214541  0.71136364  0.64988952  0.67304433
  0.67378714], Average AUC 0.7116828439287108
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.70102062  0.97231955  0.66103483  0.72836437  0.63065551  0.66323437
  0.67490989], Average AUC 0.7187913056632806
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.69588068  0.96991793  0.62700705  0.72310869  0.6442077   0.66488471
  0.6702676 ], Average AUC 0.7136106240163197
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.70598432  0.97067024  0.65440867  0.73355429  0.6444576   0.66263424
  0.66719929], Average AUC 0.7198440930838347
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70325652  0.92066235  0.5895202   0.70568708  0.64895833  0.68672089
  0.66891924], Average AUC 0.7033892315831717
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70747317  0.97197759  0.66401252  0.72256681  0.65147569  0.66456096
  0.67779241], Average AUC 0.7228370218419718
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.70325126  0.97283775  0.6175242   0.68895728  0.64683291  0.68755527
  0.67134788], Average AUC 0.7126152237225792
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.71183186  0.97146991  0.68688447  0.73815499  0.63675558  0.66647189
  0.67132665], Average AUC 0.7261279054747406
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.70964857  0.9737479   0.66626157  0.72787774  0.62436343  0.68161981
  0.67609634], Average AUC 0.7228021933942194
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.7068971   0.92757523  0.61655619  0.68248895  0.64082229  0.67524479
  0.67907441], Average AUC 0.704094135622322
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.69920297  0.96422559  0.63074758  0.70678136  0.6457097   0.68202516
  0.68322831], Average AUC 0.7159886670554979
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.70570812  0.97066761  0.61964962  0.72681503  0.6308791   0.68528111
  0.68522431], Average AUC 0.717746415595501
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.70979588  0.97383733  0.64828756  0.72516572  0.64678556  0.68349126
  0.67546728], Average AUC 0.7232615137468172
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.71006681  0.97371107  0.64269255  0.72587069  0.64418403  0.68263319
  0.67322178], Average AUC 0.7217685881737184
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.70846223  0.97350063  0.64594118  0.72001263  0.64275568  0.6882949
  0.67210169], Average AUC 0.7215812774817042
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.70851484  0.96802399  0.64166141  0.69714857  0.62935606  0.6833386
  0.66922448], Average AUC 0.713895419915176
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.70639205  0.94797717  0.63217593  0.69614373  0.63628472  0.67492367
  0.6789762 ], Average AUC 0.7104104943122785
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.70311711  0.97320865  0.66521465  0.73873632  0.6484717   0.68044852
  0.6787267 ], Average AUC 0.7268462342140766
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70974853  0.97390046  0.65369581  0.72107534  0.63920455  0.68446778
  0.66851314], Average AUC 0.7215150868907738
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.70618687  0.97232481  0.61827125  0.7320181   0.62300084  0.66471889
  0.6725768 ], Average AUC 0.7127282231228248
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70410354  0.96682976  0.60609217  0.71888152  0.63570602  0.68373605
  0.67284223], Average AUC 0.7125987543867066
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.70358796  0.97381629  0.61328914  0.70892782  0.63949653  0.6893346
  0.68110757], Average AUC 0.7156514146952163
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70424295  0.9729456   0.6523911   0.70165194  0.64700915  0.68286745
  0.67990254], Average AUC 0.7201443889130018
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.70705755  0.97301662  0.67147517  0.72878262  0.64033302  0.68005896
  0.67101345], Average AUC 0.7245339127694059
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70261732  0.95704703  0.63476957  0.69046717  0.62771465  0.67268636
  0.6761335 ], Average AUC 0.7087765134305284
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.71239478  0.92818813  0.61961806  0.71823969  0.63621107  0.67996684
  0.67434719], Average AUC 0.7098522496386969
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.70407723  0.97018098  0.63978062  0.6715646   0.63707123  0.67892188
  0.6758548 ], Average AUC 0.7110644776281205
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.7109112   0.97211174  0.67841698  0.71948127  0.64878472  0.67780585
  0.67746062], Average AUC 0.7264246273339285
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.704748    0.97161984  0.6615767   0.71187658  0.64161406  0.67620025
  0.67476656], Average AUC 0.7203431423457288
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.70621054  0.95092593  0.60847275  0.69206124  0.61114531  0.68605759
  0.67495766], Average AUC 0.7042615739341961
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.71064552  0.93938342  0.61304188  0.66884207  0.64004367  0.68664982
  0.66969163], Average AUC 0.7040425706640389
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.70602378  0.96909985  0.640867    0.7197601   0.64583596  0.68146452
  0.67136646], Average AUC 0.7192025258740211
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.71134259  0.9727983   0.63724221  0.70027357  0.64818761  0.67881133
  0.67200614], Average AUC 0.7172373916321888
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.70591856  0.97347433  0.65760732  0.71523043  0.63804451  0.68232523
  0.67131072], Average AUC 0.720558728045207
((24, 41, 2), 0.74917630074189046)

In [42]:
model = RandomForestClassifier(n_estimators=41, max_depth=2, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())


0.92964360587
0.737897513327

In [152]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[152]:
Features Importance Score
5 Pop 0.326000
3 Week 0.257007
1 Month 0.189818
6 Region_West 0.071332
0 Year 0.068160
4 TimePer 0.045465
2 Duration_Sec 0.042219

In [43]:
# 5th revision: remove population count and year

X = df[[ 'Month','Duration_Sec', 'Week', 'TimePer', 'Region_West']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485  0.59880051  0.74835859  0.65997475  0.53566919  0.55721204
  0.5353626 ], Average AUC 0.6012660743962204
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.90580808  0.74835859  0.65997475  0.54583333  0.53950569
  0.53451058], Average AUC 0.6456296688644255
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.58541667  0.91401515  0.74488636  0.65997475  0.54583333  0.55721204
  0.52493139], Average AUC 0.6474670992652886
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.57329545  0.46395202  0.74488636  0.65056818  0.53156566  0.56017583
  0.52493139], Average AUC 0.578482127263892
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.57348485  0.90833333  0.74488636  0.65751263  0.53453283  0.53950569
  0.52493139], Average AUC 0.640455296142462
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64454703  0.92128314  0.8215646   0.80209386  0.6129314   0.58474942
  0.56086199], Average AUC 0.7068616356328962
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.64424979  0.92514468  0.80816498  0.74631208  0.61570918  0.590498
  0.56086199], Average AUC 0.6987058138957613
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.64454703  0.92523674  0.78033723  0.74475484  0.61567761  0.59110339
  0.56679159], Average AUC 0.6954926335456303
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.64581755  0.92519729  0.79427346  0.72795402  0.61241056  0.58730785
  0.55735307], Average AUC 0.6929019730244013
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.66664825  0.92517098  0.81515152  0.74789825  0.61567761  0.58474942
  0.56679159], Average AUC 0.7031553751910197
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.64454703  0.92483428  0.82131471  0.75677346  0.61241056  0.58474942
  0.5710437 ], Average AUC 0.7022390251923812
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.64454703  0.92488689  0.81684028  0.74618319  0.66478851  0.59110339
  0.56679159], Average AUC 0.7078772685865155
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.66790562  0.92524726  0.81203441  0.74966593  0.64780619  0.58711834
  0.57131444], Average AUC 0.708727455124022
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.66664825  0.92483691  0.80962226  0.74771938  0.6160564   0.58730785
  0.56679159], Average AUC 0.7027118078299104
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.65287774  0.9248448   0.81460964  0.75177031  0.62761469  0.5984023
  0.56679159], Average AUC 0.7052730086272531
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.67050189  0.92511837  0.81879209  0.75060764  0.6129314   0.59141398
  0.56699332], Average AUC 0.7051940981962378
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.66664825  0.92515257  0.81817656  0.74892151  0.5959859   0.61757738
  0.57131444], Average AUC 0.7062538012378996
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66814499  0.92482376  0.80731534  0.75008944  0.64538089  0.61466625
  0.5710437 ], Average AUC 0.7116377670897415
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.66702704  0.92482376  0.81980745  0.75059186  0.61241056  0.59110339
  0.56679159], Average AUC 0.7046508075475543
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.65287774  0.92516309  0.81817656  0.74877683  0.61333649  0.62232838
  0.62338821], Average AUC 0.7148638990502274
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.67517624  0.92488163  0.8139415   0.74958439  0.6129314   0.59110339
  0.56679159], Average AUC 0.7049157335937304
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.66664825  0.92480271  0.81933923  0.75260417  0.63448285  0.61843809
  0.56494423], Average AUC 0.7116085051420883
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.66664825  0.92460017  0.81511469  0.75014731  0.6389415   0.60251895
  0.56679159], Average AUC 0.7092517799706998
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.65562132  0.92458176  0.8146149   0.79296349  0.61333649  0.59110339
  0.56679159], Average AUC 0.708430419139666
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.67639678  0.92520781  0.81441761  0.75110217  0.59274516  0.61874868
  0.56679159], Average AUC 0.7064871151437916
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.66787931  0.92490267  0.81470697  0.74780619  0.6156513   0.59141398
  0.56494423], Average AUC 0.7039006656660476
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.65298822  0.92471854  0.81192919  0.75030513  0.6160564   0.61843809
  0.56494423], Average AUC 0.705625685878019
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.64454703  0.9383549   0.81977588  0.74779303  0.61241056  0.61454254
  0.56679159], Average AUC 0.7063165067249008
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.66802662  0.92458176  0.81981797  0.75054451  0.63392782  0.61445304
  0.56679159], Average AUC 0.711163330063117
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.64454703  0.92482376  0.81612479  0.74812973  0.64789825  0.59808907
  0.59552652], Average AUC 0.7107341663351022
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.66668508  0.92460017  0.81860532  0.75081282  0.64775884  0.58711834
  0.56679159], Average AUC 0.708910308655405
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.66668508  0.9383286   0.81427031  0.79277146  0.64586753  0.60202674
  0.56679159], Average AUC 0.718105902246233
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.6754919   0.92460806  0.81653777  0.75173874  0.61241056  0.61898294
  0.56679159], Average AUC 0.7095087963240324
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.66661669  0.92482376  0.81971801  0.78887048  0.64816393  0.62008318
  0.57131444], Average AUC 0.7199414971665937
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.66705072  0.92482376  0.81855798  0.78582176  0.6129314   0.61843809
  0.5710437 ], Average AUC 0.7140953433144924
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.66655619  0.92476589  0.81588279  0.78905987  0.60887521  0.61610339
  0.57131444], Average AUC 0.7132225385520058
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.66670612  0.92098327  0.81524358  0.79350011  0.63172875  0.58730785
  0.5710437 ], Average AUC 0.7123590551136194
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66793455  0.92482376  0.81793981  0.75352483  0.62809343  0.6146873
  0.57131444], Average AUC 0.7111883048981219
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.66655619  0.93443024  0.8113952   0.78575863  0.64503893  0.61367656
  0.5710437 ], Average AUC 0.7182713508726264
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65362742  0.9209254   0.81775568  0.7891572   0.64816393  0.61867235
  0.56679159], Average AUC 0.7164419393081863
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.65084175  0.94157197  0.75356429  0.66868687  0.65835438  0.59793114
  0.52927109], Average AUC 0.685745926829594
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.57697285  0.91611427  0.75153356  0.7219697   0.57470013  0.58598126
  0.53889806], Average AUC 0.6665956898964751
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.66319444  0.95996423  0.76442287  0.68178662  0.5341593   0.5794483
  0.52832618], Average AUC 0.6730431346754806
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.57368213  0.91888152  0.75951705  0.52630208  0.57835911  0.55624342
  0.54962389], Average AUC 0.637515601475552
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.62649148  0.93700021  0.78007155  0.68743687  0.60228062  0.58036692
  0.54716606], Average AUC 0.680116242651594
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.68915194  0.96767414  0.81563026  0.77795665  0.64592014  0.66362129
  0.67983883], Average AUC 0.7485418922022697
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.70790983  0.97328493  0.8140283   0.80339594  0.63247317  0.65243209
  0.67083561], Average AUC 0.7506228391785512
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.70516625  0.96041141  0.82668876  0.78357534  0.62638626  0.66167351
  0.66355234], Average AUC 0.746779123424656
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.69795086  0.97238531  0.81780566  0.79707755  0.62602062  0.66559012
  0.68017327], Average AUC 0.7510004854294207
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.68119476  0.96510943  0.81105061  0.767006    0.63594539  0.66051011
  0.65464202], Average AUC 0.7393511879018917
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69588857  0.97038352  0.81464646  0.75764678  0.62940341  0.65936513
  0.6613095 ], Average AUC 0.7412347687416724
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.68702125  0.96968645  0.81943918  0.78016098  0.63129998  0.65577753
  0.65404747], Average AUC 0.742490407154091
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.68493266  0.97015993  0.81307607  0.7681634   0.62669402  0.65979154
  0.65636463], Average AUC 0.7398831799768153
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.69462069  0.96773464  0.82021517  0.76597485  0.6272701   0.65702516
  0.66295514], Average AUC 0.7422565353079278
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.70512942  0.96648779  0.81604061  0.78423295  0.63263889  0.65770689
  0.66719398], Average AUC 0.7470615050586745
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.70048664  0.9694576   0.81728483  0.7959859   0.62823285  0.65648294
  0.66022657], Average AUC 0.7468796174518609
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.70389047  0.97128051  0.81918666  0.78798664  0.63398569  0.66197884
  0.6750718 ], Average AUC 0.7504829430703476
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.68989899  0.9705545   0.81858691  0.77905093  0.6270202   0.65819909
  0.67660861], Average AUC 0.7457027479805772
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.69745107  0.96972327  0.81442024  0.78084228  0.63196023  0.65094757
  0.67831264], Average AUC 0.7462367575550101
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.69407355  0.97088594  0.81895781  0.79094855  0.62658091  0.65458254
  0.65914894], Average AUC 0.745025463377251
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.69558081  0.97055187  0.82099905  0.7940604   0.63718697  0.65619078
  0.66267113], Average AUC 0.7481772876326874
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.69822969  0.96823706  0.81450705  0.790746    0.63752894  0.66136029
  0.66526168], Average AUC 0.747981529491331
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.70038142  0.97157513  0.81950495  0.78239426  0.62624947  0.66250526
  0.66871486], Average AUC 0.7473321924946527
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.69465751  0.9678346   0.82099905  0.79055924  0.63506418  0.65721994
  0.68011753], Average AUC 0.7494931506636134
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.69924505  0.96895518  0.81596959  0.78600852  0.63171033  0.65854917
  0.67526821], Average AUC 0.7479580084065952
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.69566498  0.96835543  0.8186553   0.76419665  0.62250368  0.66097073
  0.66300822], Average AUC 0.7419078579629879
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.70444287  0.97043087  0.81762942  0.79149306  0.63303872  0.66595599
  0.6739517 ], Average AUC 0.7509918038040334
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69903988  0.96909722  0.82250631  0.79167193  0.6285222   0.65959676
  0.66747002], Average AUC 0.7482720456464265
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.69666456  0.97078862  0.81719539  0.7866451   0.63613479  0.66131554
  0.66915812], Average AUC 0.7482717308130908
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.70267519  0.97072548  0.81579598  0.78125789  0.63077388  0.66320015
  0.67109042], Average AUC 0.7479312853253599
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.70289089  0.97164352  0.81664825  0.78861006  0.6384496   0.6613366
  0.67645466], Average AUC 0.7508619398465916
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.70412195  0.96998106  0.81671928  0.78264415  0.63592961  0.65504843
  0.66409381], Average AUC 0.7469340409852817
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.69285827  0.96868424  0.81921822  0.79170349  0.6380866   0.6613945
  0.67347394], Average AUC 0.7493456089591889
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.70127578  0.9717803   0.81712437  0.7787379   0.62867477  0.66199463
  0.68103325], Average AUC 0.7486601423277154
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.69819287  0.96931555  0.81985217  0.78938342  0.63633996  0.6574542
  0.67342351], Average AUC 0.7491373818067014
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.7025463   0.96932081  0.8168771   0.78223117  0.63474853  0.65871499
  0.67287142], Average AUC 0.7481871887265726
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.69669087  0.96798453  0.8166693   0.78677399  0.62862742  0.66064435
  0.66628092], Average AUC 0.7462387669255538
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69695128  0.96988373  0.81802925  0.79164036  0.63389625  0.6606812
  0.672391  ], Average AUC 0.7490675832896568
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.69832176  0.96866846  0.81693761  0.78723695  0.63170244  0.6625816
  0.67235119], Average AUC 0.7482571427572094
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.69723801  0.97067024  0.81981008  0.78688973  0.63194971  0.66010739
  0.67584684], Average AUC 0.7489302850332403
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.65105219  0.90235953  0.80104956  0.75319076  0.58914141  0.56949621
  0.5878637 ], Average AUC 0.6934504805678247
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.58252315  0.94086437  0.8044718   0.72699916  0.54304766  0.6213466
  0.6080599 ], Average AUC 0.6896160921744094
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.59921612  0.94192445  0.80452967  0.76272885  0.60832281  0.66976995
  0.54028093], Average AUC 0.7038246834499703
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.6636311   0.9391835   0.78021359  0.77322706  0.53693971  0.57022005
  0.63682934], Average AUC 0.7000349076827239
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.5962358   0.94495213  0.7740504   0.71680082  0.58333859  0.62118604
  0.55616928], Average AUC 0.6846761503460155
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.70351957  0.97218013  0.81790299  0.77919297  0.64714594  0.65919404
  0.69616779], Average AUC 0.7536147765101082
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.69556766  0.96806082  0.82150673  0.77135154  0.63322285  0.65599073
  0.69849292], Average AUC 0.7491704638075679
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.7059396   0.9684133   0.82450547  0.7802057   0.63438289  0.66126027
  0.67235915], Average AUC 0.749580912448825
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.69114846  0.96773201  0.82318234  0.78646622  0.64643571  0.67358654
  0.68516592], Average AUC 0.7533881733574412
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.70387205  0.9681029   0.82731218  0.7812237   0.6431634   0.66716414
  0.67957341], Average AUC 0.7529159701266367
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.70799663  0.96956808  0.81600905  0.78487742  0.63781303  0.67658981
  0.67965834], Average AUC 0.7532160510897921
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.70138363  0.97315341  0.81719802  0.76757418  0.64233481  0.6721573
  0.67504525], Average AUC 0.7498352279828098
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.70506629  0.96662984  0.81658512  0.76705598  0.64148253  0.6704043
  0.66911035], Average AUC 0.7480477719440636
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70202546  0.97016782  0.81864215  0.77428188  0.6447338   0.67008844
  0.67267501], Average AUC 0.7503735090124062
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.68819971  0.97238005  0.81724011  0.78596117  0.64472064  0.66165245
  0.69544053], Average AUC 0.7522278089755144
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.70457176  0.97224327  0.81685606  0.77792508  0.6481008   0.66894609
  0.68900662], Average AUC 0.7539499547593689
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.70154935  0.97106745  0.81954703  0.78126841  0.64249     0.66754053
  0.69690302], Average AUC 0.7543379710087835
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.69768255  0.96978378  0.82226957  0.77920349  0.64789299  0.67776637
  0.68448112], Average AUC 0.7541542683625738
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.70059449  0.97084649  0.81738215  0.78157881  0.64464962  0.66858286
  0.67315277], Average AUC 0.7509695986023128
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.70423506  0.96958912  0.82111742  0.77603904  0.64512574  0.670478
  0.68157471], Average AUC 0.7525941551664694
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.70463752  0.97108849  0.81753735  0.7848932   0.64246107  0.66780112
  0.6905222 ], Average AUC 0.7541344211732498
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.69898464  0.96928925  0.81517782  0.78887048  0.63806555  0.67042272
  0.6897737 ], Average AUC 0.7529405929345486
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70323811  0.96984428  0.81901568  0.78822075  0.64301084  0.67083333
  0.67641219], Average AUC 0.7529393112430594
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.70031829  0.96765572  0.81389152  0.77823811  0.64353956  0.6710518
  0.68341677], Average AUC 0.75115882407962
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.69893203  0.96847643  0.82368213  0.77502367  0.64161406  0.67701885
  0.68412014], Average AUC 0.7526953304348915
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.70535827  0.97050715  0.81983375  0.77451073  0.64050926  0.67367604
  0.68180298], Average AUC 0.7523140268756895
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.69924769  0.97028093  0.82065446  0.79376052  0.63927294  0.66750895
  0.67394905], Average AUC 0.7520963626625826
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.70316972  0.96940499  0.81919192  0.77972696  0.64360269  0.67027795
  0.6981107 ], Average AUC 0.7547835617202623
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.69685659  0.96987058  0.8183528   0.78756839  0.63477746  0.67292061
  0.67868423], Average AUC 0.7512900956926064
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.70412195  0.96997054  0.82021517  0.78206808  0.64122738  0.66962255
  0.68428205], Average AUC 0.7530725311446452
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.70838331  0.97103851  0.81905777  0.78080545  0.64082229  0.67035165
  0.69222623], Average AUC 0.754669314772177
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.70184133  0.97163563  0.81874474  0.77473169  0.64037774  0.67275479
  0.68361849], Average AUC 0.7519577718108914
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.70513994  0.97140678  0.81582229  0.78013994  0.64007523  0.67458149
  0.69094953], Average AUC 0.7540164568542502
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70767572  0.96949442  0.81868424  0.78154987  0.64347117  0.67333123
  0.69153612], Average AUC 0.755106110030809
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70423506  0.96957334  0.82087542  0.77348222  0.64912668  0.66802748
  0.69040541], Average AUC 0.7536750869645062
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.70157039  0.96976536  0.81486216  0.78465383  0.64193761  0.67022005
  0.68712741], Average AUC 0.7528766867963582
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.70176505  0.96962858  0.8197338   0.78546402  0.64099327  0.673018
  0.68551893], Average AUC 0.7537316625353059
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.7014415   0.96949179  0.81871843  0.78638468  0.64057239  0.67669773
  0.69290572], Average AUC 0.7551731768284629
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.70051294  0.96977325  0.81865793  0.78270991  0.63941498  0.67046483
  0.68436433], Average AUC 0.7522711702137798
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.70170455  0.97175926  0.8176794   0.77902988  0.6421533   0.67249158
  0.67727748], Average AUC 0.7517279209799314
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.62856166  0.94150095  0.83935974  0.75795455  0.59738794  0.62086492
  0.62951953], Average AUC 0.716449897445674
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.6397359   0.9553267   0.8156592   0.68522727  0.63660564  0.63766846
  0.63070598], Average AUC 0.7144184497468403
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.62782776  0.9367056   0.79360532  0.76387311  0.65889625  0.53990051
  0.60637976], Average AUC 0.7038840434038877
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.66869739  0.92477641  0.81233165  0.77307449  0.62854324  0.59138503
  0.66074945], Average AUC 0.7227939533162819
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.60004998  0.95432713  0.80583176  0.73095013  0.58095013  0.59220362
  0.64535214], Average AUC 0.7013806961943418
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.70129156  0.96938394  0.82892992  0.79168771  0.651381    0.67519478
  0.70435881], Average AUC 0.7603182467764157
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.6899174   0.97011785  0.826744    0.78029251  0.66176347  0.67984839
  0.69795941], Average AUC 0.7580918610840074
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.69324232  0.96822128  0.83010048  0.77957176  0.64834017  0.67831386
  0.6904001 ], Average AUC 0.7554557096532598
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.69845065  0.96931029  0.83002683  0.78146044  0.63456965  0.66479785
  0.68189588], Average AUC 0.7515016567642295
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.70575547  0.96916035  0.82345328  0.78803925  0.6530987   0.67033323
  0.69514856], Average AUC 0.7578555480267333
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70470065  0.970131    0.82474221  0.79304503  0.65350642  0.67528953
  0.70111532], Average AUC 0.7603614524173058
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70042351  0.96849747  0.82984796  0.79124842  0.65678662  0.68499684
  0.70014121], Average AUC 0.7617060035587485
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.70364057  0.97198811  0.82932186  0.79315814  0.65226221  0.69006633
  0.68835367], Average AUC 0.7612558428526085
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.70469013  0.9690604   0.82850905  0.78495633  0.66279461  0.67851653
  0.70285916], Average AUC 0.7616266016814021
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.70556082  0.97191446  0.82213016  0.77875894  0.66671665  0.68425195
  0.70324137], Average AUC 0.7617963340930533
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.68889941  0.97160669  0.82951126  0.78828388  0.66117687  0.68363603
  0.70173376], Average AUC 0.7606925572030787
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.68656881  0.96892361  0.82334543  0.78640572  0.65322233  0.69392504
  0.69928654], Average AUC 0.7588110690063655
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.70004998  0.97055713  0.82652041  0.78761837  0.65299348  0.68295431
  0.69922814], Average AUC 0.7599888318040737
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.70542929  0.97216435  0.82978746  0.78937027  0.65279356  0.6876474
  0.70811989], Average AUC 0.763616030735868
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.69037774  0.97064131  0.82202231  0.78651094  0.65884364  0.68545483
  0.69815848], Average AUC 0.7588584644596782
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.70400621  0.97023885  0.82828283  0.78622422  0.6619818   0.67828753
  0.70430307], Average AUC 0.7619035012608099
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.69311343  0.97198285  0.82564447  0.79025147  0.65465067  0.68371762
  0.69667475], Average AUC 0.7594336093222551
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.69677241  0.97101221  0.82736216  0.7871028   0.65459543  0.68418614
  0.69240672], Average AUC 0.759062553342371
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.69709859  0.96935238  0.83051084  0.79096696  0.65555293  0.67540798
  0.69707289], Average AUC 0.7594232233729841
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70213068  0.97149884  0.82972959  0.79231902  0.66163457  0.68401506
  0.70233893], Average AUC 0.7633809561379508
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.6999395   0.97163037  0.83007944  0.79264783  0.64971591  0.68667614
  0.70229115], Average AUC 0.7618543342396141
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70184922  0.97124106  0.82934817  0.78815499  0.66458333  0.68357812
  0.69659247], Average AUC 0.7621924801238092
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.69704335  0.97017835  0.82875631  0.79095907  0.65619476  0.68475995
  0.69768337], Average AUC 0.7607964510967739
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70083123  0.9717803   0.82714646  0.784375    0.66006155  0.68173563
  0.69886982], Average AUC 0.7606857139686626
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.69994476  0.97219855  0.82722275  0.79385259  0.65656566  0.67926669
  0.70012263], Average AUC 0.7613105165008314
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70087069  0.96966014  0.82587858  0.78939394  0.65660774  0.68526269
  0.70298126], Average AUC 0.7615221475174645
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.69282144  0.96990741  0.82849327  0.78716856  0.66182397  0.68130133
  0.69987047], Average AUC 0.7601980636528046
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.69625947  0.97243266  0.82919823  0.78528251  0.65298822  0.67942198
  0.69605897], Average AUC 0.7588060057859772
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.70169665  0.97088068  0.82359007  0.7899779   0.6563447   0.68192514
  0.69824872], Average AUC 0.7603805529965612
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.69485743  0.9694576   0.82992424  0.79271886  0.65812553  0.67946147
  0.69876896], Average AUC 0.760473438936487
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.69787195  0.97021254  0.83116319  0.79223748  0.65993266  0.68322015
  0.6976117 ], Average AUC 0.7617499542676971
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.69874263  0.9714778   0.8286169   0.79122475  0.66024043  0.68330438
  0.69631378], Average AUC 0.7614172370163474
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.69976063  0.9701915   0.82729903  0.78763941  0.6568366   0.68479154
  0.68945253], Average AUC 0.7594244623689524
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.70557923  0.97026252  0.8249658   0.78771833  0.65812027  0.68890819
  0.69677031], Average AUC 0.7617606637916848
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.69668824  0.97226168  0.82934817  0.78795244  0.65907513  0.6818646
  0.70031373], Average AUC 0.7610719982856337
((24, 61, 4), 0.76361603073586803)

In [44]:
model = RandomForestClassifier(n_estimators=41, max_depth=2, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
print (scores.mean())


0.92964360587
0.741650801023

In [164]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[164]:
Features Importance Score
2 Week 0.357023
4 Region_West 0.276114
0 Month 0.239704
3 TimePer 0.071609
1 Duration_Sec 0.055550

In [67]:
# 5th revision: remove population count and year

X = df[['Duration_Sec', 'Week', 'TimePer', 'Region_West']]
y = df['Hoax']

score_model={}   
for n_depth in range(1, 5, 1):
    for n_trees in range(1, 150, 20):
        for n_min in range(6, 36, 6):
            model = RandomForestClassifier(n_estimators=n_trees, max_depth=n_depth, min_samples_leaf=n_min)
            scores = cross_val_score(model, X, y, scoring='roc_auc', cv=7)
            score_model[n_min,n_trees,n_depth]=scores.mean()
            print('n_min sample leaf: {}, n_trees: {},n_depth: {}, CV AUC {}, Average AUC {}'.format(n_min,n_trees,n_depth,scores, scores.mean()))
best=max(score_model.items(), key=lambda x: x[1])
print (best)


n_min sample leaf: 6, n_trees: 1,n_depth: 1, CV AUC [ 0.58603481  0.91398515  0.5447079   0.65382506  0.54471545  0.55953334
  0.51711596], Average AUC 0.6171310958401701
n_min sample leaf: 12, n_trees: 1,n_depth: 1, CV AUC [ 0.61323259  0.46235058  0.73955063  0.66955949  0.54471545  0.55899469
  0.52281457], Average AUC 0.5873168565993174
n_min sample leaf: 18, n_trees: 1,n_depth: 1, CV AUC [ 0.56778984  0.46235058  0.5447079   0.56273897  0.54471545  0.55953334
  0.52281457], Average AUC 0.5378072357187941
n_min sample leaf: 24, n_trees: 1,n_depth: 1, CV AUC [ 0.58603481  0.91398515  0.73371468  0.56273897  0.53262849  0.55953334
  0.51711596], Average AUC 0.6293930580991137
n_min sample leaf: 30, n_trees: 1,n_depth: 1, CV AUC [ 0.56778984  0.9220297   0.61014851  0.66955949  0.53116699  0.53444335
  0.52281457], Average AUC 0.6225646369761743
n_min sample leaf: 6, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933  0.90100771  0.81746559  0.74414644  0.59141562  0.61467442
  0.59513957], Average AUC 0.7010612393250959
n_min sample leaf: 12, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933  0.93071068  0.78998128  0.74313521  0.61198734  0.58567545
  0.5572982 ], Average AUC 0.6946239273675595
n_min sample leaf: 18, n_trees: 21,n_depth: 1, CV AUC [ 0.64571024  0.93109303  0.79289926  0.74313521  0.6126791   0.58457802
  0.56230461], Average AUC 0.6960570678389824
n_min sample leaf: 24, n_trees: 21,n_depth: 1, CV AUC [ 0.66749092  0.93109303  0.81119697  0.74918749  0.65000654  0.61946185
  0.5569886 ], Average AUC 0.7122036301351987
n_min sample leaf: 30, n_trees: 21,n_depth: 1, CV AUC [ 0.64357933  0.93111316  0.8070615   0.75350409  0.60757516  0.59015077
  0.56230461], Average AUC 0.699326945282106
n_min sample leaf: 6, n_trees: 41,n_depth: 1, CV AUC [ 0.64357933  0.93089179  0.81441178  0.75315946  0.61135344  0.58457802
  0.55687533], Average AUC 0.699264163366381
n_min sample leaf: 12, n_trees: 41,n_depth: 1, CV AUC [ 0.67483681  0.93075344  0.81426085  0.75303369  0.61135344  0.59984898
  0.56230461], Average AUC 0.7066274023709754
n_min sample leaf: 18, n_trees: 41,n_depth: 1, CV AUC [ 0.65188089  0.93091946  0.80931035  0.74692103  0.61135344  0.59090589
  0.55687533], Average AUC 0.6997380561211737
n_min sample leaf: 24, n_trees: 41,n_depth: 1, CV AUC [ 0.66542239  0.93089179  0.80749416  0.74554757  0.61135344  0.61636588
  0.55687533], Average AUC 0.7048500808466479
n_min sample leaf: 30, n_trees: 41,n_depth: 1, CV AUC [ 0.66542239  0.93077357  0.80954178  0.78675894  0.608848    0.58605804
  0.56230461], Average AUC 0.707101046752399
n_min sample leaf: 6, n_trees: 61,n_depth: 1, CV AUC [ 0.66841165  0.93075344  0.81396905  0.78213797  0.61135344  0.61976138
  0.56230461], Average AUC 0.7126702206246482
n_min sample leaf: 12, n_trees: 61,n_depth: 1, CV AUC [ 0.66675733  0.92729463  0.81519661  0.74871458  0.60757516  0.58962219
  0.56230461], Average AUC 0.7024950172765624
n_min sample leaf: 18, n_trees: 61,n_depth: 1, CV AUC [ 0.66570684  0.93091946  0.8181498   0.74861899  0.6082141   0.58567545
  0.55665635], Average AUC 0.7019915713316035
n_min sample leaf: 24, n_trees: 61,n_depth: 1, CV AUC [ 0.6655746   0.93075344  0.80828403  0.78202226  0.60757516  0.58605804
  0.56230461], Average AUC 0.7060817349576419
n_min sample leaf: 30, n_trees: 61,n_depth: 1, CV AUC [ 0.6667174   0.93075344  0.81326723  0.74832971  0.6082141   0.5892396
  0.56230461], Average AUC 0.7026894416861437
n_min sample leaf: 6, n_trees: 81,n_depth: 1, CV AUC [ 0.64509142  0.93078111  0.81140324  0.74962519  0.64826833  0.5892396
  0.56230461], Average AUC 0.7052447880343955
n_min sample leaf: 12, n_trees: 81,n_depth: 1, CV AUC [ 0.66666001  0.93091946  0.81410992  0.79223265  0.6509574   0.61791387
  0.56230461], Average AUC 0.7192997035254451
n_min sample leaf: 18, n_trees: 81,n_depth: 1, CV AUC [ 0.66720646  0.93091946  0.81130262  0.75417824  0.6082141   0.61993003
  0.56230461], Average AUC 0.7077222183044122
n_min sample leaf: 24, n_trees: 81,n_depth: 1, CV AUC [ 0.66747345  0.93091946  0.81025115  0.74941389  0.608848    0.58605804
  0.56246571], Average AUC 0.7022042433707915
n_min sample leaf: 30, n_trees: 81,n_depth: 1, CV AUC [ 0.6667174   0.93091946  0.81441429  0.79301749  0.608848    0.61622492
  0.56230461], Average AUC 0.7132065982240215
n_min sample leaf: 6, n_trees: 101,n_depth: 1, CV AUC [ 0.66548227  0.93091946  0.81408225  0.74903657  0.61198734  0.61598329
  0.56230461], Average AUC 0.7071136846236575
n_min sample leaf: 12, n_trees: 101,n_depth: 1, CV AUC [ 0.66841165  0.93091946  0.81337036  0.75369526  0.64906574  0.61646656
  0.55687533], Average AUC 0.712686339154064
n_min sample leaf: 18, n_trees: 101,n_depth: 1, CV AUC [ 0.66548227  0.93078111  0.81315906  0.74560543  0.64491769  0.61766216
  0.56230461], Average AUC 0.7114160489748809
n_min sample leaf: 24, n_trees: 101,n_depth: 1, CV AUC [ 0.66735368  0.94305924  0.81266099  0.78718405  0.6082141   0.58567545
  0.56230461], Average AUC 0.7094931621643613
n_min sample leaf: 30, n_trees: 101,n_depth: 1, CV AUC [ 0.66837672  0.93075344  0.81215035  0.74876992  0.6476621   0.59113242
  0.56246571], Average AUC 0.7087586646567612
n_min sample leaf: 6, n_trees: 121,n_depth: 1, CV AUC [ 0.66846654  0.93075344  0.81355148  0.74845297  0.6082141   0.61603363
  0.56230461], Average AUC 0.706825253323495
n_min sample leaf: 12, n_trees: 121,n_depth: 1, CV AUC [ 0.66726385  0.93078111  0.81201451  0.78824308  0.64720931  0.5892396
  0.56230461], Average AUC 0.7138651533093741
n_min sample leaf: 18, n_trees: 121,n_depth: 1, CV AUC [ 0.64357933  0.93075344  0.81053288  0.79771643  0.65098255  0.61653956
  0.56230461], Average AUC 0.7160584009205773
n_min sample leaf: 24, n_trees: 121,n_depth: 1, CV AUC [ 0.66857134  0.93077357  0.81218808  0.74766059  0.65444891  0.61977649
  0.56230461], Average AUC 0.7136747972402208
n_min sample leaf: 30, n_trees: 121,n_depth: 1, CV AUC [ 0.66666001  0.93091946  0.81380303  0.787013    0.65098255  0.61598329
  0.55687533], Average AUC 0.717462381864616
n_min sample leaf: 6, n_trees: 141,n_depth: 1, CV AUC [ 0.6655746   0.93078111  0.8117051   0.75402731  0.61135344  0.58605804
  0.56230461], Average AUC 0.7031148877502732
n_min sample leaf: 12, n_trees: 141,n_depth: 1, CV AUC [ 0.66802739  0.93077357  0.81373008  0.78669353  0.63516009  0.58567545
  0.56230461], Average AUC 0.7117663875831717
n_min sample leaf: 18, n_trees: 141,n_depth: 1, CV AUC [ 0.66676731  0.93075344  0.81367222  0.79203141  0.65034865  0.61603363
  0.56246571], Average AUC 0.7188674804295996
n_min sample leaf: 24, n_trees: 141,n_depth: 1, CV AUC [ 0.66720646  0.93077357  0.81561418  0.78468365  0.6082141   0.58567545
  0.56230461], Average AUC 0.7077817181716706
n_min sample leaf: 30, n_trees: 141,n_depth: 1, CV AUC [ 0.65199068  0.93077357  0.81166234  0.78452769  0.60773867  0.61648921
  0.56230461], Average AUC 0.709355253148957
n_min sample leaf: 6, n_trees: 1,n_depth: 2, CV AUC [ 0.66347613  0.92233911  0.77091383  0.73847903  0.59258029  0.65151904
  0.54021244], Average AUC 0.6970742671663025
n_min sample leaf: 12, n_trees: 1,n_depth: 2, CV AUC [ 0.62101765  0.78066892  0.77003089  0.7611688   0.5632798   0.58570817
  0.6765159 ], Average AUC 0.6797700183446045
n_min sample leaf: 18, n_trees: 1,n_depth: 2, CV AUC [ 0.61336234  0.95862513  0.75721696  0.63432243  0.58843224  0.58312568
  0.59929271], Average AUC 0.6763396406959105
n_min sample leaf: 24, n_trees: 1,n_depth: 2, CV AUC [ 0.6505709   0.55738348  0.76968124  0.71387245  0.62021553  0.56986332
  0.54627854], Average AUC 0.6325522093727872
n_min sample leaf: 30, n_trees: 1,n_depth: 2, CV AUC [ 0.61118652  0.95862513  0.65083917  0.69560241  0.5736034   0.58434393
  0.55124217], Average AUC 0.6607775322202111
n_min sample leaf: 6, n_trees: 21,n_depth: 2, CV AUC [ 0.69671231  0.97245029  0.82167653  0.80499879  0.63530347  0.64565682
  0.63597876], Average AUC 0.7446824252248828
n_min sample leaf: 12, n_trees: 21,n_depth: 2, CV AUC [ 0.69943459  0.97278989  0.81057061  0.79100006  0.6547558   0.65752976
  0.647897  ], Average AUC 0.7477111012421787
n_min sample leaf: 18, n_trees: 21,n_depth: 2, CV AUC [ 0.69062151  0.96687092  0.8069483   0.79311811  0.6458283   0.65507564
  0.63923834], Average AUC 0.7425287314603359
n_min sample leaf: 24, n_trees: 21,n_depth: 2, CV AUC [ 0.69941961  0.97640717  0.81814729  0.79344261  0.65468033  0.64194165
  0.66118453], Average AUC 0.7493175981354786
n_min sample leaf: 30, n_trees: 21,n_depth: 2, CV AUC [ 0.69750828  0.96774129  0.81228618  0.79027057  0.64122495  0.65749453
  0.64399557], Average AUC 0.7443601944367569
n_min sample leaf: 6, n_trees: 41,n_depth: 2, CV AUC [ 0.69926491  0.97462117  0.81729453  0.79304013  0.65314085  0.66236754
  0.6564424 ], Average AUC 0.7508816463166799
n_min sample leaf: 12, n_trees: 41,n_depth: 2, CV AUC [ 0.7028655   0.97455073  0.81765173  0.79330929  0.64961915  0.6586574
  0.65273226], Average AUC 0.7499122956581336
n_min sample leaf: 18, n_trees: 41,n_depth: 2, CV AUC [ 0.69225587  0.97360994  0.81235662  0.79446138  0.63915469  0.66269476
  0.66055274], Average AUC 0.7478694285073578
n_min sample leaf: 24, n_trees: 41,n_depth: 2, CV AUC [ 0.69399503  0.9732301   0.81883653  0.78642186  0.62867514  0.6556898
  0.65511088], Average AUC 0.7445656188659501
n_min sample leaf: 30, n_trees: 41,n_depth: 2, CV AUC [ 0.69733861  0.97682222  0.81000463  0.78169021  0.63784412  0.65267185
  0.65291601], Average AUC 0.7441839503577415
n_min sample leaf: 6, n_trees: 61,n_depth: 2, CV AUC [ 0.69437181  0.9766562   0.82214441  0.79884086  0.6374844   0.66026077
  0.66291122], Average AUC 0.7503813816797825
n_min sample leaf: 12, n_trees: 61,n_depth: 2, CV AUC [ 0.7005225   0.97636944  0.81851203  0.79515314  0.63914966  0.65705908
  0.6497118 ], Average AUC 0.7480682349433215
n_min sample leaf: 18, n_trees: 61,n_depth: 2, CV AUC [ 0.70114131  0.97751902  0.81427091  0.80179908  0.64480953  0.66317048
  0.64457198], Average AUC 0.7496117569708998
n_min sample leaf: 24, n_trees: 61,n_depth: 2, CV AUC [ 0.70395091  0.97776302  0.8134257   0.79330929  0.65096998  0.66109895
  0.64920587], Average AUC 0.749960530305856
n_min sample leaf: 30, n_trees: 61,n_depth: 2, CV AUC [ 0.70091175  0.96731365  0.81137306  0.79618198  0.64896261  0.66348008
  0.63349191], Average AUC 0.7459592907791156
n_min sample leaf: 6, n_trees: 81,n_depth: 2, CV AUC [ 0.70384612  0.97703353  0.81507838  0.79826984  0.64146895  0.65955347
  0.64741876], Average AUC 0.7489527218110407
n_min sample leaf: 12, n_trees: 81,n_depth: 2, CV AUC [ 0.69957681  0.97674676  0.81013795  0.79586754  0.64977008  0.65546578
  0.64704372], Average AUC 0.7478012359992542
n_min sample leaf: 18, n_trees: 81,n_depth: 2, CV AUC [ 0.69791251  0.9783768   0.81315654  0.78833364  0.64153938  0.66135065
  0.65696343], Average AUC 0.7482332784370836
n_min sample leaf: 24, n_trees: 81,n_depth: 2, CV AUC [ 0.70337951  0.97591162  0.81411243  0.80704389  0.64949086  0.65906517
  0.65840066], Average AUC 0.7524863063664251
n_min sample leaf: 30, n_trees: 81,n_depth: 2, CV AUC [ 0.6987484   0.97787119  0.81117685  0.79662974  0.64005776  0.65120189
  0.64570968], Average AUC 0.7459136442661684
n_min sample leaf: 6, n_trees: 101,n_depth: 2, CV AUC [ 0.70001098  0.97534814  0.81111899  0.78628602  0.63676749  0.65390017
  0.65215082], Average AUC 0.745083231657374
n_min sample leaf: 12, n_trees: 101,n_depth: 2, CV AUC [ 0.69751577  0.97774038  0.81344834  0.79167673  0.64975248  0.66229706
  0.6578897 ], Average AUC 0.7500457802120097
n_min sample leaf: 18, n_trees: 101,n_depth: 2, CV AUC [ 0.69793746  0.97643232  0.81559657  0.80381399  0.64196702  0.65470311
  0.64733318], Average AUC 0.7482548092887275
n_min sample leaf: 24, n_trees: 101,n_depth: 2, CV AUC [ 0.70090676  0.97739827  0.81551859  0.8006168   0.65088193  0.66293388
  0.66425281], Average AUC 0.7532155784204039
n_min sample leaf: 30, n_trees: 101,n_depth: 2, CV AUC [ 0.69596375  0.97607764  0.81641411  0.80669675  0.64710114  0.65087971
  0.64998364], Average AUC 0.7490166775714081
n_min sample leaf: 6, n_trees: 121,n_depth: 2, CV AUC [ 0.70139083  0.97734796  0.81583303  0.79199368  0.64614777  0.65755493
  0.65093509], Average AUC 0.7487433279658414
n_min sample leaf: 12, n_trees: 121,n_depth: 2, CV AUC [ 0.69727124  0.97593426  0.81255786  0.79985209  0.64044766  0.65294621
  0.64723502], Average AUC 0.7466063321262125
n_min sample leaf: 18, n_trees: 121,n_depth: 2, CV AUC [ 0.70149563  0.97615813  0.81566701  0.78856255  0.64678922  0.65749704
  0.66318307], Average AUC 0.7499075208410717
n_min sample leaf: 24, n_trees: 121,n_depth: 2, CV AUC [ 0.6948459   0.97573805  0.81512869  0.7896593   0.65207428  0.65104332
  0.64867981], Average AUC 0.7467384768578232
n_min sample leaf: 30, n_trees: 121,n_depth: 2, CV AUC [ 0.70116377  0.97768252  0.81326723  0.78737775  0.64925189  0.65892421
  0.64190893], Average AUC 0.7470823284515608
n_min sample leaf: 6, n_trees: 141,n_depth: 2, CV AUC [ 0.70222922  0.97628391  0.81534251  0.79906474  0.63904904  0.65558408
  0.65872033], Average AUC 0.7494676902744979
n_min sample leaf: 12, n_trees: 141,n_depth: 2, CV AUC [ 0.70592712  0.97714924  0.81672855  0.79417965  0.64869094  0.66523446
  0.65389766], Average AUC 0.7516868014289219
n_min sample leaf: 18, n_trees: 141,n_depth: 2, CV AUC [ 0.69955436  0.97546134  0.81466081  0.79589018  0.64666093  0.65085706
  0.6518085 ], Average AUC 0.7478418831610344
n_min sample leaf: 24, n_trees: 141,n_depth: 2, CV AUC [ 0.7064037   0.97648515  0.81478659  0.79252445  0.64069669  0.65453699
  0.65702635], Average AUC 0.7489228460918724
n_min sample leaf: 30, n_trees: 141,n_depth: 2, CV AUC [ 0.70267586  0.97725992  0.81192898  0.7931332   0.64440956  0.65816406
  0.63750661], Average AUC 0.7464397425992051
n_min sample leaf: 6, n_trees: 1,n_depth: 3, CV AUC [ 0.64873693  0.94661616  0.79645617  0.80745643  0.64338827  0.58603287
  0.63327041], Average AUC 0.7231367476745595
n_min sample leaf: 12, n_trees: 1,n_depth: 3, CV AUC [ 0.65839588  0.95425823  0.74315282  0.6889766   0.60432263  0.66008709
  0.61978907], Average AUC 0.7041403308853008
n_min sample leaf: 18, n_trees: 1,n_depth: 3, CV AUC [ 0.67222932  0.92543568  0.80144188  0.72969492  0.62478367  0.62636865
  0.66094289], Average AUC 0.720128144181185
n_min sample leaf: 24, n_trees: 1,n_depth: 3, CV AUC [ 0.58488452  0.94195243  0.79940182  0.75330285  0.62624014  0.56629414
  0.67192731], Average AUC 0.7062861712802961
n_min sample leaf: 30, n_trees: 1,n_depth: 3, CV AUC [ 0.58278605  0.96196571  0.7784251   0.74489354  0.65157621  0.61610411
  0.6004581 ], Average AUC 0.7051726886863351
n_min sample leaf: 6, n_trees: 21,n_depth: 3, CV AUC [ 0.71747245  0.97880192  0.80708665  0.7794967   0.65210446  0.66237761
  0.66658109], Average AUC 0.7519886978326733
n_min sample leaf: 12, n_trees: 21,n_depth: 3, CV AUC [ 0.69134013  0.97769762  0.81211764  0.78119717  0.65506017  0.66299932
  0.68527021], Average AUC 0.7522403228936977
n_min sample leaf: 18, n_trees: 21,n_depth: 3, CV AUC [ 0.68519193  0.98035901  0.81809195  0.7770617   0.65796305  0.67451736
  0.67456014], Average AUC 0.7525350202757684
n_min sample leaf: 24, n_trees: 21,n_depth: 3, CV AUC [ 0.68768465  0.97816298  0.81262074  0.76664     0.66578876  0.67016789
  0.66984319], Average AUC 0.7501297442065498
n_min sample leaf: 30, n_trees: 21,n_depth: 3, CV AUC [ 0.69424705  0.98001942  0.8061358   0.79613922  0.65799072  0.66432832
  0.67938534], Average AUC 0.7540351231833661
n_min sample leaf: 6, n_trees: 41,n_depth: 3, CV AUC [ 0.70680543  0.97905598  0.80797462  0.78778274  0.65297231  0.66778424
  0.67170581], Average AUC 0.7534401626123325
n_min sample leaf: 12, n_trees: 41,n_depth: 3, CV AUC [ 0.69313668  0.97793659  0.81156927  0.78738278  0.66177151  0.67877873
  0.67496791], Average AUC 0.7550776367890099
n_min sample leaf: 18, n_trees: 41,n_depth: 3, CV AUC [ 0.69366566  0.97823593  0.81183339  0.79233579  0.6649184   0.66705933
  0.67697148], Average AUC 0.7550028547853321
n_min sample leaf: 24, n_trees: 41,n_depth: 3, CV AUC [ 0.70496896  0.97705868  0.80911917  0.7932137   0.66299907  0.66689572
  0.67460293], Average AUC 0.7555511770331352
n_min sample leaf: 30, n_trees: 41,n_depth: 3, CV AUC [ 0.70291291  0.97542361  0.81437656  0.78507607  0.66265445  0.66373682
  0.68566287], Average AUC 0.7556918970162817
n_min sample leaf: 6, n_trees: 61,n_depth: 3, CV AUC [ 0.696353    0.9761204   0.80953172  0.78903294  0.65440363  0.66991618
  0.68604294], Average AUC 0.7544858302288694
n_min sample leaf: 12, n_trees: 61,n_depth: 3, CV AUC [ 0.69962422  0.97869124  0.81387346  0.77916969  0.6640908   0.67839865
  0.68126306], Average AUC 0.7564444446707127
n_min sample leaf: 18, n_trees: 61,n_depth: 3, CV AUC [ 0.69846644  0.97808249  0.80916948  0.78106134  0.65409422  0.67611568
  0.67910846], Average AUC 0.7537283026266559
n_min sample leaf: 24, n_trees: 61,n_depth: 3, CV AUC [ 0.69940464  0.97997666  0.81421054  0.78342087  0.66373611  0.67609051
  0.68176647], Average AUC 0.7569436863259413
n_min sample leaf: 30, n_trees: 61,n_depth: 3, CV AUC [ 0.69944956  0.97773787  0.81113157  0.7791068   0.66003079  0.66953862
  0.68207858], Average AUC 0.7541533980983843
n_min sample leaf: 6, n_trees: 81,n_depth: 3, CV AUC [ 0.69814955  0.98009992  0.81621036  0.78135817  0.66500392  0.6751592
  0.67851192], Average AUC 0.7563561480653457
n_min sample leaf: 12, n_trees: 81,n_depth: 3, CV AUC [ 0.70302519  0.97859062  0.81179315  0.77981868  0.65771402  0.67826525
  0.68078733], Average AUC 0.7557134626138547
n_min sample leaf: 18, n_trees: 81,n_depth: 3, CV AUC [ 0.70438259  0.97844975  0.81221575  0.77380413  0.65715306  0.67458783
  0.68327418], Average AUC 0.7548381851992554
n_min sample leaf: 24, n_trees: 81,n_depth: 3, CV AUC [ 0.69497066  0.97770768  0.81065866  0.77823895  0.65803852  0.66852929
  0.68809182], Average AUC 0.7537479385986531
n_min sample leaf: 30, n_trees: 81,n_depth: 3, CV AUC [ 0.70211195  0.97885223  0.80603518  0.78625332  0.65972641  0.6726497
  0.67343502], Average AUC 0.7541519729518311
n_min sample leaf: 6, n_trees: 101,n_depth: 3, CV AUC [ 0.70528336  0.98013765  0.80912672  0.77985893  0.65815423  0.66742933
  0.68281104], Average AUC 0.7546858948719379
n_min sample leaf: 12, n_trees: 101,n_depth: 3, CV AUC [ 0.70282807  0.97923207  0.80863117  0.78541063  0.65860953  0.67347781
  0.68588437], Average AUC 0.7562962358030448
n_min sample leaf: 18, n_trees: 101,n_depth: 3, CV AUC [ 0.70571253  0.97854785  0.80930532  0.78293035  0.67025879  0.66923154
  0.67398626], Average AUC 0.7557103790333172
n_min sample leaf: 24, n_trees: 101,n_depth: 3, CV AUC [ 0.69694187  0.97858559  0.8103568   0.782792    0.66632959  0.6703315
  0.68057338], Average AUC 0.7551301037503835
n_min sample leaf: 30, n_trees: 101,n_depth: 3, CV AUC [ 0.70182999  0.97840447  0.80639741  0.78695263  0.66346696  0.66807622
  0.6768557 ], Average AUC 0.7545690520515824
n_min sample leaf: 6, n_trees: 121,n_depth: 3, CV AUC [ 0.70351425  0.97859565  0.81290751  0.78255303  0.66145204  0.67484205
  0.68083516], Average AUC 0.7563856709221343
n_min sample leaf: 12, n_trees: 121,n_depth: 3, CV AUC [ 0.69741347  0.97810764  0.80626912  0.78400447  0.65741467  0.66924413
  0.67230235], Average AUC 0.7521079780414698
n_min sample leaf: 18, n_trees: 121,n_depth: 3, CV AUC [ 0.70095916  0.98011249  0.81317415  0.77695605  0.6533597   0.67547132
  0.67105389], Average AUC 0.7530123939184082
n_min sample leaf: 24, n_trees: 121,n_depth: 3, CV AUC [ 0.70135091  0.97985088  0.80950153  0.77961493  0.66412099  0.67147172
  0.68620655], Average AUC 0.7560167858024922
n_min sample leaf: 30, n_trees: 121,n_depth: 3, CV AUC [ 0.70028296  0.97925471  0.81209752  0.77808299  0.6558601   0.67017292
  0.68177402], Average AUC 0.7539321737730275
n_min sample leaf: 6, n_trees: 141,n_depth: 3, CV AUC [ 0.69916261  0.97796678  0.80886259  0.7778239   0.66147972  0.66791261
  0.6819477 ], Average AUC 0.7535936986430295
n_min sample leaf: 12, n_trees: 141,n_depth: 3, CV AUC [ 0.70355667  0.97827115  0.8140093   0.78320454  0.66368832  0.66712477
  0.6708047 ], Average AUC 0.7543799215628502
n_min sample leaf: 18, n_trees: 141,n_depth: 3, CV AUC [ 0.703906    0.97860571  0.8097883   0.78138835  0.65558088  0.67051524
  0.68292935], Average AUC 0.7546734035856852
n_min sample leaf: 24, n_trees: 141,n_depth: 3, CV AUC [ 0.7041131   0.97885978  0.81220569  0.78830093  0.65587268  0.67063354
  0.68279091], Average AUC 0.7561109465976392
n_min sample leaf: 30, n_trees: 141,n_depth: 3, CV AUC [ 0.69799235  0.97986849  0.81264338  0.77731577  0.65751278  0.67391075
  0.67937275], Average AUC 0.7540880381901138
n_min sample leaf: 6, n_trees: 1,n_depth: 4, CV AUC [ 0.65731795  0.95910811  0.80514721  0.72000523  0.57069548  0.6434242
  0.63169221], Average AUC 0.7124843417684606
n_min sample leaf: 12, n_trees: 1,n_depth: 4, CV AUC [ 0.67332222  0.96185754  0.76635324  0.7622882   0.60488358  0.64315739
  0.58868081], Average AUC 0.7143632831020471
n_min sample leaf: 18, n_trees: 1,n_depth: 4, CV AUC [ 0.59551661  0.9663955   0.75563471  0.7588797   0.58724241  0.64100028
  0.65558408], Average AUC 0.7086076122036395
n_min sample leaf: 24, n_trees: 1,n_depth: 4, CV AUC [ 0.68362993  0.96012185  0.76405659  0.74269249  0.60054435  0.62367288
  0.66150167], Average AUC 0.7194599666106403
n_min sample leaf: 30, n_trees: 1,n_depth: 4, CV AUC [ 0.67962761  0.92506591  0.77862634  0.72284523  0.69670168  0.59045282
  0.63666088], Average AUC 0.7185686376354872
n_min sample leaf: 6, n_trees: 21,n_depth: 4, CV AUC [ 0.70030042  0.97776302  0.81168749  0.78378562  0.66013644  0.67086259
  0.69337008], Average AUC 0.7568436678046223
n_min sample leaf: 12, n_trees: 21,n_depth: 4, CV AUC [ 0.70101655  0.97905598  0.82404864  0.78161223  0.67786314  0.67786
  0.67663923], Average AUC 0.7597279676116573
n_min sample leaf: 18, n_trees: 21,n_depth: 4, CV AUC [ 0.69307929  0.98039171  0.82163628  0.78739787  0.6761526   0.67351809
  0.68209117], Average AUC 0.7591810005412256
n_min sample leaf: 24, n_trees: 21,n_depth: 4, CV AUC [ 0.70128853  0.97958172  0.81662038  0.78267377  0.67269128  0.67555942
  0.69148733], Average AUC 0.7599860601959223
n_min sample leaf: 30, n_trees: 21,n_depth: 4, CV AUC [ 0.6883858   0.9793176   0.81406715  0.79044917  0.68489646  0.68189987
  0.69510181], Average AUC 0.7620168378323454
n_min sample leaf: 6, n_trees: 41,n_depth: 4, CV AUC [ 0.70319237  0.97971756  0.81648706  0.79302252  0.67345599  0.67950867
  0.69032445], Average AUC 0.762244088300155
n_min sample leaf: 12, n_trees: 41,n_depth: 4, CV AUC [ 0.70155302  0.97798438  0.81829319  0.78116196  0.67996106  0.67982582
  0.68184198], Average AUC 0.7600887721664737
n_min sample leaf: 18, n_trees: 41,n_depth: 4, CV AUC [ 0.69337123  0.97770516  0.80648545  0.79013725  0.68182504  0.67028367
  0.69113494], Average AUC 0.7587061057441974
n_min sample leaf: 24, n_trees: 41,n_depth: 4, CV AUC [ 0.69636298  0.97833404  0.82069548  0.77890304  0.67262839  0.67606786
  0.69030683], Average AUC 0.7590426610104258
n_min sample leaf: 30, n_trees: 41,n_depth: 4, CV AUC [ 0.69433188  0.97741337  0.81278425  0.78957377  0.67457035  0.67532533
  0.69340532], Average AUC 0.7596291825842824
n_min sample leaf: 6, n_trees: 61,n_depth: 4, CV AUC [ 0.69725128  0.97890506  0.81251006  0.79052715  0.67934728  0.67096579
  0.68717813], Average AUC 0.7595263923183023
n_min sample leaf: 12, n_trees: 61,n_depth: 4, CV AUC [ 0.69527258  0.97928489  0.81751087  0.78821792  0.67591363  0.67879886
  0.68179919], Average AUC 0.7595425629567425
n_min sample leaf: 18, n_trees: 61,n_depth: 4, CV AUC [ 0.7022941   0.97827115  0.81176548  0.78355168  0.67755122  0.67619623
  0.69004002], Average AUC 0.7599528379106432
n_min sample leaf: 24, n_trees: 61,n_depth: 4, CV AUC [ 0.69966414  0.98005464  0.80877707  0.79202135  0.67051286  0.67126784
  0.6878074 ], Average AUC 0.7585864705125038
n_min sample leaf: 30, n_trees: 61,n_depth: 4, CV AUC [ 0.69928487  0.97793659  0.80971786  0.79351556  0.68380977  0.67915125
  0.68307785], Average AUC 0.7609276786610721
n_min sample leaf: 6, n_trees: 81,n_depth: 4, CV AUC [ 0.69674974  0.98009488  0.81348356  0.79028063  0.6797246   0.67356843
  0.67949609], Average AUC 0.7590568468583182
n_min sample leaf: 12, n_trees: 81,n_depth: 4, CV AUC [ 0.70051751  0.97711905  0.81820766  0.78927191  0.68016733  0.68380528
  0.68028644], Average AUC 0.7613393123004932
n_min sample leaf: 18, n_trees: 81,n_depth: 4, CV AUC [ 0.69790253  0.97915409  0.81892709  0.78592379  0.6744647   0.67297944
  0.6895643 ], Average AUC 0.7598451334473112
n_min sample leaf: 24, n_trees: 81,n_depth: 4, CV AUC [ 0.69707661  0.97880695  0.81541294  0.78678912  0.68200364  0.68064135
  0.68776209], Average AUC 0.7612132434480969
n_min sample leaf: 30, n_trees: 81,n_depth: 4, CV AUC [ 0.70247375  0.97867866  0.81754608  0.79176226  0.67325475  0.67436633
  0.68621662], Average AUC 0.7606140641509852
n_min sample leaf: 6, n_trees: 101,n_depth: 4, CV AUC [ 0.70274573  0.97865854  0.81832337  0.77607311  0.67819518  0.6716907
  0.68608321], Average AUC 0.7588242639625639
n_min sample leaf: 12, n_trees: 101,n_depth: 4, CV AUC [ 0.70148565  0.98033889  0.8184416   0.78031927  0.66931045  0.67761585
  0.6863475 ], Average AUC 0.7591227437341225
n_min sample leaf: 18, n_trees: 101,n_depth: 4, CV AUC [ 0.69689945  0.97997414  0.81352632  0.78695766  0.67786062  0.67641773
  0.68220444], Average AUC 0.7591200516684108
n_min sample leaf: 24, n_trees: 101,n_depth: 4, CV AUC [ 0.70348431  0.97897549  0.81753351  0.78194176  0.67315916  0.68028644
  0.68807672], Average AUC 0.7604939125179458
n_min sample leaf: 30, n_trees: 101,n_depth: 4, CV AUC [ 0.69617834  0.9784095   0.81578524  0.78958132  0.66648555  0.67700672
  0.6866571 ], Average AUC 0.7585862530736526
n_min sample leaf: 6, n_trees: 121,n_depth: 4, CV AUC [ 0.70542059  0.97967731  0.81197175  0.78022368  0.67631862  0.67583126
  0.69103677], Average AUC 0.7600685688774153
n_min sample leaf: 12, n_trees: 121,n_depth: 4, CV AUC [ 0.7003653   0.97890254  0.81739767  0.79383251  0.67769208  0.67657379
  0.68966498], Average AUC 0.7620612667214617
n_min sample leaf: 18, n_trees: 121,n_depth: 4, CV AUC [ 0.69684456  0.9796748   0.8183611   0.78945555  0.67165993  0.67805633
  0.6913212 ], Average AUC 0.7607676376695307
n_min sample leaf: 24, n_trees: 121,n_depth: 4, CV AUC [ 0.70029044  0.97995402  0.81452497  0.78541063  0.67726193  0.68282111
  0.69002744], Average AUC 0.761470077834229
n_min sample leaf: 30, n_trees: 121,n_depth: 4, CV AUC [ 0.69660751  0.97935784  0.81890445  0.78787582  0.67690976  0.67184424
  0.68634247], Average AUC 0.7596917290682094
n_min sample leaf: 6, n_trees: 141,n_depth: 4, CV AUC [ 0.69487085  0.97891763  0.81749829  0.79086171  0.67646704  0.67129804
  0.68906592], Average AUC 0.7598542115700424
n_min sample leaf: 12, n_trees: 141,n_depth: 4, CV AUC [ 0.70211444  0.97900064  0.81280689  0.794318    0.67723426  0.67824259
  0.69017846], Average AUC 0.7619850413216771
n_min sample leaf: 18, n_trees: 141,n_depth: 4, CV AUC [ 0.70095666  0.97946601  0.81676125  0.79072084  0.67742041  0.67067633
  0.68909109], Average AUC 0.7607275139325723
n_min sample leaf: 24, n_trees: 141,n_depth: 4, CV AUC [ 0.69986376  0.97895285  0.8146105   0.79239364  0.6733604   0.67672984
  0.6887085 ], Average AUC 0.7606599287882635
n_min sample leaf: 30, n_trees: 141,n_depth: 4, CV AUC [ 0.69970906  0.97903838  0.81621036  0.79468023  0.67836875  0.68040223
  0.68267261], Average AUC 0.7615830863588452
((6, 41, 4), 0.762244088300155)

In [70]:
model = RandomForestClassifier(n_estimators=81, max_depth=4, min_samples_leaf=6, min_samples_split=10)
model.fit(X, y)
# print (model.score(X, y))
scores=cross_val_score(model, X, y, scoring='roc_auc', cv=7)
scores_acc=cross_val_score(model, X, y, scoring='accuracy', cv=7)
print (scores.mean())
print (scores_acc.mean())


0.757576490633
0.942161176283

In [71]:
features = X.columns
feature_importances = model.feature_importances_

features_df = pd.DataFrame({'Features': features, 'Importance Score': feature_importances})
features_df.sort('Importance Score', inplace=True, ascending=False)

features_df


/Users/annakudryashova/anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:5: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[71]:
Features Importance Score
1 Week 0.538725
2 TimePer 0.175699
3 Region_West 0.168867
0 Duration_Sec 0.116708