Preliminaries



In [1]:

    
# Show all figures inline.  
%matplotlib inline

# Add olfaction-prediction to the Python path.  
import os
import sys
curr_path = os.getcwd()
gerkin_path = os.path.split(curr_path)[0]
olfaction_prediction_path = os.path.split(gerkin_path)[0]
sys.path.append(olfaction_prediction_path)
import opc_python

# Import numerical libraries.  
import numpy as np
from matplotlib.pyplot import plot



In [2]:

    
# Import generic utility modules I wrote to load the data from the tab-delimited text files and to score predictions.  
from opc_python.utils import loading, scoring

# Import the modules I wrote for actually shaping and fitting the data to the model.  
from opc_python.gerkin import dream,fit1,params
from opc_python.gerkin.fit1 import rfc_cv



In [3]:

    
# Load the perceptual descriptors data.  
perceptual_headers, perceptual_obs_data = loading.load_perceptual_data('training')
loading.format_leaderboard_perceptual_data()
# Show the perceptual metadata types and perceptual descriptor names.
print(perceptual_headers)









    



['Compound Identifier', 'Odor', 'Replicate', 'Intensity', 'Dilution', 'subject #', 'INTENSITY/STRENGTH', 'VALENCE/PLEASANTNESS', 'BAKERY', 'SWEET', 'FRUIT', 'FISH', 'GARLIC', 'SPICES', 'COLD', 'SOUR', 'BURNT', 'ACID', 'WARM', 'MUSKY', 'SWEATY', 'AMMONIA/URINOUS', 'DECAYED', 'WOOD', 'GRASS', 'FLOWER', 'CHEMICAL']



In [4]:

    
# Show the metadata and perceptual descriptor values for the first compound.
print(perceptual_obs_data[1])









    



['126', '4-Hydroxybenzaldehyde', False, 'high', '1/10', '1', 37, 60, 0, 72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]



In [5]:

    
num_descriptors = len(perceptual_headers[6:])
num_subjects = 49
print('There are %d different perceptual descriptors and %d different subjects' % (num_descriptors,num_subjects))









    



There are 21 different perceptual descriptors and 49 different subjects



In [6]:

    
# Load the molecular descriptors data.  
molecular_headers, molecular_data = loading.load_molecular_data()
print("First ten molecular descriptor types are %s" % molecular_headers[:10])
print("First ten descriptor values for the first compound are %s" % molecular_data[0][:10])
total_size = len(set([int(row[0]) for row in molecular_data]))
print("We have molecular descriptors for %d unique molecules" % total_size)









    



First ten molecular descriptor types are ['CID', 'complexity from pubmed', 'MW', 'AMW', 'Sv', 'Se', 'Sp', 'Si', 'Mv', 'Me']
First ten descriptor values for the first compound are ['126', 93.1, 122.13, 8.142, 10.01, 15.305, 10.193, 16.664, 0.667, 1.02]
We have molecular descriptors for 476 unique molecules



In [7]:

    
training_size = len(set([int(row[0]) for row in perceptual_obs_data]))
print("We have perceptual data for %d unique molecules" % training_size)
remaining_size = total_size - training_size
print ("%d are left out for testing in the competition; half of these (%d) are used for the leaderboard." \
       % (remaining_size,remaining_size/2))









    



We have perceptual data for 338 unique molecules
138 are left out for testing in the competition; half of these (69) are used for the leaderboard.



In [8]:

    
print("There are %d rows in the perceptual data set (at least one for each subject and molecule)" % len(perceptual_obs_data))
print("%d of these are replicates (same subject and molecules)" % sum([x[2] for x in perceptual_obs_data]))









    



There are 35084 rows in the perceptual data set (at least one for each subject and molecule)
1960 of these are replicates (same subject and molecules)



In [9]:

    
all_CIDs = sorted(loading.get_CIDs('training')+loading.get_CIDs('leaderboard')+loading.get_CIDs('testset'))
DATA = '/Users/rgerkin/Dropbox/science/olfaction-prediction/data/'
import pandas



In [10]:

    
episuite = pandas.read_table('%s/DREAM_episuite_descriptors.txt' % DATA)
episuite.iloc[:,49] = 1*(episuite.iloc[:,49]=='YES ')
episuite.iloc[:,49]
episuite = episuite.iloc[:,2:].as_matrix()
episuite.shape









    Out[10]:





(476, 62)



In [11]:

    
verbal = pandas.read_table('%s/name_features.txt' % DATA, sep='\t', header=None)
verbal = verbal.as_matrix()[:,1:]
verbal.shape









    Out[11]:





(476, 11786)



In [12]:

    
morgan = pandas.read_csv('%s/morgan_sim.csv' % DATA)
morgan = morgan.as_matrix()[:,1:]
morgan.shape









    Out[12]:





(476, 2437)



In [13]:

    
with open('%s/derived/nspdk_r3_d4_unaug.svm' % DATA) as f:
    nspdk_dict = {}
    i = 0
    while True:
        x = f.readline()
        if(len(x)):
            key_vals = x.split(' ')[1:]
            for key_val in key_vals:
                key,val = key_val.split(':')
                if key in nspdk_dict:
                    nspdk_dict[key][all_CIDs[i]] = val
                else:
                    nspdk_dict[key] = {all_CIDs[i]:val}
            i+=1
            if i == len(all_CIDs):
                break
        else:
            break
nspdk_dict = {key:value for key,value in nspdk_dict.items() if len(value)>1}



In [14]:

    
nspdk = np.zeros((len(all_CIDs),len(nspdk_dict)))
for j,(feature,facts) in enumerate(nspdk_dict.items()):
    for CID,value in facts.items():
        i = all_CIDs.index(CID)
        nspdk[i,j] = value
nspdk.shape









    Out[14]:





(476, 6163)



In [15]:

    
nspdk_gramian = pandas.read_table('%s/derived/nspdk_r3_d4_unaug_gramian.mtx' % DATA, delimiter=' ', header=None)
nspdk_gramian = nspdk_gramian.as_matrix()[:len(all_CIDs),:]
nspdk_gramian.shape









    Out[15]:





(476, 2437)



In [16]:

    
molecular_data_types = ['dragon','dragon+episuite','dragon+verbal','dragon+morgan',
                                                          'dragon+nspdk','dragon+nspdk_gramian','dragon+all']
molecular_data = {mdt:molecular_data.copy() for mdt in molecular_data_types}
for i,line in enumerate(molecular_data['dragon']):
    CID = int(line[0])
    index = all_CIDs.index(CID)
    molecular_data['dragon+episuite'][i] = line + list(episuite[index])
    molecular_data['dragon+verbal'][i] = line + list(verbal[index])
    molecular_data['dragon+morgan'][i] = line + list(morgan[index])
    molecular_data['dragon+nspdk'][i] = line + list(nspdk[index])
    molecular_data['dragon+nspdk_gramian'][i] = line + list(nspdk_gramian[index])
    molecular_data['dragon+all'][i] = line + list(episuite[index]) + list(morgan[index]) + list(nspdk[index]) + list(nspdk_gramian[index])

Create Molecular Matrix



In [17]:

    
X_training = {mdt:None for mdt in molecular_data_types}
X_leaderboard_other = {mdt:None for mdt in molecular_data_types}
X_leaderboard_int = {mdt:None for mdt in molecular_data_types}
X_testset_other = {mdt:None for mdt in molecular_data_types}
X_testset_int = {mdt:None for mdt in molecular_data_types}
X_all = {mdt:None for mdt in molecular_data_types}
for mdt in molecular_data_types:
    X_training[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],"training")
    X_leaderboard_other[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],"leaderboard",target_dilution='high',good1=good1,good2=good2,means=means,stds=stds)
    X_leaderboard_int[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],"leaderboard",target_dilution=-3,good1=good1,good2=good2,means=means,stds=stds)
    X_testset_other[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],"testset",target_dilution='high',good1=good1,good2=good2,means=means,stds=stds)
    X_testset_int[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],"testset",target_dilution=-3,good1=good1,good2=good2,means=means,stds=stds)
    X_all[mdt],good1,good2,means,stds,imputer = dream.make_X(molecular_data[mdt],['training','leaderboard'],good1=good1,good2=good2,means=means,stds=stds)









    



There are now 676 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (676x3033) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (69x3033) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (69x3033) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (69x3033) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (69x3033) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 4871, one for each molecule and dilution
The X matrix now has shape (814x3033) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (676x3089) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (69x3089) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (69x3089) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (69x3089) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (69x3089) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 4933, one for each molecule and dilution
The X matrix now has shape (814x3089) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (676x14801) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (69x14801) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (69x14801) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (69x14801) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (69x14801) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 16657, one for each molecule and dilution
The X matrix now has shape (814x14801) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (676x5467) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5467) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5467) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5467) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5467) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (814x5467) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (676x8989) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (69x8989) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (69x8989) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (69x8989) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (69x8989) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 11034, one for each molecule and dilution
The X matrix now has shape (814x8989) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (676x5468) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5468) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5468) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5468) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (69x5468) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 7308, one for each molecule and dilution
The X matrix now has shape (814x5468) molecules by non-NaN good molecular descriptors
There are now 676 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (676x13914) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (69x13914) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (69x13914) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (69x13914) molecules by non-NaN good molecular descriptors
There are now 69 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (69x13914) molecules by non-NaN good molecular descriptors
There are now 814 molecular vectors of length 15970, one for each molecule and dilution
The X matrix now has shape (814x13914) molecules by non-NaN good molecular descriptors



In [18]:

    
Y_training_imp,imputer = dream.make_Y_obs('training',target_dilution=None,imputer='median')
Y_training_mask,imputer = dream.make_Y_obs('training',target_dilution=None,imputer='mask')









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (676x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (676x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (676x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (676x21) molecules by perceptual descriptors, one for each subject
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (676x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (676x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (676x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (676x21) molecules by perceptual descriptors, one for each subject



In [19]:

    
Y_leaderboard,imputer = dream.make_Y_obs('leaderboard',target_dilution='gold',imputer='mask')









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (69x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (69x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (69x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (69x21) molecules by perceptual descriptors, one for each subject
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (69x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (69x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (69x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (69x21) molecules by perceptual descriptors, one for each subject



In [20]:

    
Y_leaderboard_noimpute,_ = dream.make_Y_obs('leaderboard',target_dilution='gold',imputer=None)









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (69x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (69x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (69x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (69x21) molecules by perceptual descriptors, one for each subject
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (69x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (69x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (69x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (69x21) molecules by perceptual descriptors, one for each subject



In [21]:

    
Y_all_imp,imputer = dream.make_Y_obs(['training','leaderboard'],target_dilution=None,imputer='median')









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (676x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (676x42) (molecules by 2 x perceptual descriptors)
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (138x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (138x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (814x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (814x21) molecules by perceptual descriptors, one for each subject



In [22]:

    
Y_all_mask,imputer = dream.make_Y_obs(['training','leaderboard'],target_dilution=None,imputer='mask')









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (676x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (676x42) (molecules by 2 x perceptual descriptors)
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (138x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (138x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (814x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (814x21) molecules by perceptual descriptors, one for each subject



In [23]:

    
Y_all_zero,imputer = dream.make_Y_obs(['training','leaderboard'],target_dilution=None,imputer='zero')









    



Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (676x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (676x42) (molecules by 2 x perceptual descriptors)
Getting basic perceptual data...
Flattening into vectors...
Assembling into matrices...
Y_obs['subject'] contains 49 matrices each with shape (138x21) (molecules by perceptual descriptors)
The Y_obs['mean_std'] matrix has shape (138x42) (molecules by 2 x perceptual descriptors)
Combining Y matrices...
The Y['mean_std'] matrix now has shape (814x42) molecules by 2 x perceptual descriptors
The Y['subject'] dict now has 49 matrices of shape (814x21) molecules by perceptual descriptors, one for each subject



In [24]:

    
import matplotlib.pyplot as plt
plt.scatter(Y_all_mask['mean_std'][:,0],Y_all_mask['mean_std'][:,21])









    Out[24]:





<matplotlib.collections.PathCollection at 0x1103bc470>

Data preparation



In [25]:

    
# Show the range of values for the molecular and perceptual descriptors.  
plt.hist(X_training['dragon+all'].ravel())
plt.yscale('log')
plt.ylabel('Count')
plt.xlabel('Cube root transformed, N(0,1) normalized molecular descriptor values')
plt.figure()
plt.hist(Y_training_imp['mean_std'][:21].ravel())
plt.yscale('log')
plt.ylabel('Count')
_ = plt.xlabel('Perceptual descriptor subject-averaged values')

Fitting and Generating Submission Files



In [26]:

    
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor
from sklearn.cross_validation import ShuffleSplit
n_obs = len(Y_training_imp['mean_std'])



In [82]:

    
# How much should subjected be pooled for estimating individual subjects' responses?  
n_estimators_list = [5,18,50,150]
#rfcs = {subject:[None for _ in range(len(n_estimators_list))] for subject in range(1,50)}
X = X_all['dragon+all']
Y = Y_all_imp['subject']
for subject in range(1,50):
    for i,n_estimators in enumerate(n_estimators_list):
        if i<3:
            continue
        print(subject,n_estimators)
        rfcs[subject][i] = RandomForestRegressor(n_estimators=n_estimators,max_features=None,min_samples_leaf=1,
                                    max_depth=None,oob_score=True,n_jobs=-1,random_state=0)
        rfcs[subject][i].fit(X,Y[subject])



In [83]:

    
fig,axes = plt.subplots(7,3,sharex=True,sharey=True,figsize=(10,12))
a_list = np.linspace(0.01,0.99,35)
for col,ax in enumerate(axes.flat):
    rs = np.zeros((35,len(n_estimators_list)))
    x_max = np.zeros(len(n_estimators_list))
    y_max = np.zeros(len(n_estimators_list))
    for i,n_estimators in enumerate(n_estimators_list):
        prediction_pooled = np.zeros(rfcs[1][i].oob_prediction_.shape)
        for subject in range(1,50):
            prediction = rfcs[subject][i].oob_prediction_
            prediction_pooled += prediction
        prediction_pooled /= subject
        for j,a in enumerate(a_list):
            r = 0
            denom = 0
            for subject in range(1,50):
                observation = Y[subject][:,col]
                prediction = rfcs[subject][i].oob_prediction_[:,col]
                prediction_weighted = a*prediction_pooled[:,col] + (1-a)*prediction
                r_ = np.corrcoef(prediction_weighted,observation)[0,1]
                if not np.isnan(r_):
                    r += r_
                    denom += 1    
            r /= denom
            rs[j,i] = r
        #print(col,rs)
        ax.plot(a_list,rs[:,i],color=['r','g','b','c'][i])
        x_max[i] = a_list[np.argmax(rs[:,i])]
        y_max[i] = np.amax(rs[:,i])
    ax.plot(x_max,y_max,color='k')



In [85]:

    
import pickle
with open('/Users/rgerkin/Desktop/rfcs1_oob.pickle','wb') as f:
    pickle.dump(rfcs,f)









    



---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-85-8f587475779c> in <module>()
      1 import pickle
      2 with open('/Users/rgerkin/Desktop/rfcs1_oob.pickle','wb') as f:
----> 3     pickle.dump(rfcs,f)

OSError: [Errno 22] Invalid argument



In [41]:

    
rfcs[18][i].__dict__









    Out[41]:





{'base_estimator': DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
            max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, random_state=None,
            splitter='best'),
 'base_estimator_': DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
            max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, random_state=None,
            splitter='best'),
 'bootstrap': True,
 'class_weight': None,
 'criterion': 'mse',
 'estimator_params': ('criterion',
  'max_depth',
  'min_samples_split',
  'min_samples_leaf',
  'min_weight_fraction_leaf',
  'max_features',
  'max_leaf_nodes',
  'random_state'),
 'estimators_': [DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
             max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, random_state=209652396,
             splitter='best'),
  DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
             max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, random_state=398764591,
             splitter='best'),
  DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
             max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, random_state=924231285,
             splitter='best'),
  DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
             max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, random_state=1478610112,
             splitter='best'),
  DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
             max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2,
             min_weight_fraction_leaf=0.0, random_state=441365315,
             splitter='best')],
 'max_depth': None,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 5,
 'n_features_': 13914,
 'n_jobs': -1,
 'n_outputs_': 21,
 'oob_prediction_': array([[ 25.        ,  71.        ,   0.        , ...,   0.        ,
           0.        ,   0.        ],
        [ 90.        ,  70.        ,   0.        , ...,   0.        ,
           0.        ,  43.5       ],
        [  1.33333333,  27.66666667,   0.        , ...,   0.        ,
           0.        ,   0.        ],
        ..., 
        [  0.        ,  48.        ,   0.        , ...,   0.        ,
           0.        ,   0.        ],
        [ 35.66666667,  38.33333333,   0.        , ...,   7.66666667,
           0.        ,   0.        ],
        [  9.        ,  31.        ,   0.        , ...,  23.        ,
           0.        ,   0.        ]]),
 'oob_score': True,
 'oob_score_': -0.37090791721993288,
 'random_state': 0,
 'verbose': 0,
 'warm_start': False}



In [35]:

    
# When do we want ExtraTrees instead of the usual RandomForest?
n_splits = 10
results = np.zeros((42,2,n_splits))
shuffle_split = ShuffleSplit(n_obs,n_splits,test_size=0.2,random_state=0)
for col in range(42):
    rfc = RandomForestRegressor(n_estimators=20,max_features=None,min_samples_leaf=1,
                                max_depth=None,oob_score=False,n_jobs=-1,random_state=0)
    etc = ExtraTreesRegressor(n_estimators=20,max_features=None,min_samples_leaf=1,
                                max_depth=None,n_jobs=-1,random_state=0)
    for j,(train,test) in enumerate(shuffle_split):
        for i,estimator in enumerate([etc,rfc]):
            X = X_all['dragon+all']
            observed = Y_all_mask['mean_std'][:,col]
            estimator.fit(X[train,:],observed[train])
            predicted = estimator.predict(X[test,:])
            results[col,i,j] = np.corrcoef(predicted,observed[test])[1,0]
    means = results[col,:,:].mean(axis=1)
    sems = results[col,:,:].std(axis=1)/np.sqrt(n_splits)
    print('Desc. %d: [%.3f +/- %.3f], [%.3f +/- %.3f]' % \
          (col,means[0],sems[0],means[1],sems[1]))









    



Desc. 0: [0.664 +/- 0.020], [0.593 +/- 0.020]
Desc. 1: [0.626 +/- 0.011], [0.635 +/- 0.009]
Desc. 2: [0.557 +/- 0.071], [0.586 +/- 0.057]
Desc. 3: [0.678 +/- 0.024], [0.669 +/- 0.023]
Desc. 4: [0.705 +/- 0.022], [0.690 +/- 0.025]
Desc. 5: [0.365 +/- 0.017], [0.424 +/- 0.014]
Desc. 6: [0.557 +/- 0.048], [0.608 +/- 0.038]
Desc. 7: [0.247 +/- 0.020], [0.303 +/- 0.022]
Desc. 8: [0.269 +/- 0.024], [0.302 +/- 0.021]
Desc. 9: [0.531 +/- 0.027], [0.544 +/- 0.023]
Desc. 10: [0.426 +/- 0.034], [0.462 +/- 0.030]
Desc. 11: [0.415 +/- 0.019], [0.414 +/- 0.017]
Desc. 12: [0.180 +/- 0.020], [0.216 +/- 0.022]
Desc. 13: [0.395 +/- 0.014], [0.406 +/- 0.022]
Desc. 14: [0.494 +/- 0.021], [0.499 +/- 0.021]
Desc. 15: [0.289 +/- 0.023], [0.301 +/- 0.029]
Desc. 16: [0.494 +/- 0.044], [0.512 +/- 0.037]
Desc. 17: [0.260 +/- 0.042], [0.295 +/- 0.038]
Desc. 18: [0.235 +/- 0.036], [0.244 +/- 0.041]
Desc. 19: [0.494 +/- 0.026], [0.519 +/- 0.022]
Desc. 20: [0.574 +/- 0.025], [0.583 +/- 0.024]
Desc. 21: [-0.060 +/- 0.039], [-0.065 +/- 0.029]
Desc. 22: [0.237 +/- 0.015], [0.231 +/- 0.008]
Desc. 23: [0.298 +/- 0.032], [0.317 +/- 0.024]
Desc. 24: [0.445 +/- 0.022], [0.472 +/- 0.020]
Desc. 25: [0.486 +/- 0.023], [0.502 +/- 0.018]
Desc. 26: [0.335 +/- 0.020], [0.377 +/- 0.018]
Desc. 27: [0.394 +/- 0.016], [0.445 +/- 0.011]
Desc. 28: [0.174 +/- 0.019], [0.204 +/- 0.015]
Desc. 29: [0.251 +/- 0.016], [0.245 +/- 0.016]
Desc. 30: [0.442 +/- 0.020], [0.447 +/- 0.025]
Desc. 31: [0.297 +/- 0.021], [0.335 +/- 0.022]
Desc. 32: [0.358 +/- 0.011], [0.349 +/- 0.019]
Desc. 33: [0.096 +/- 0.013], [0.118 +/- 0.011]
Desc. 34: [0.370 +/- 0.017], [0.373 +/- 0.014]
Desc. 35: [0.413 +/- 0.017], [0.425 +/- 0.016]
Desc. 36: [0.223 +/- 0.021], [0.241 +/- 0.026]
Desc. 37: [0.324 +/- 0.027], [0.358 +/- 0.028]
Desc. 38: [0.216 +/- 0.035], [0.229 +/- 0.026]
Desc. 39: [0.119 +/- 0.027], [0.129 +/- 0.024]
Desc. 40: [0.310 +/- 0.027], [0.357 +/- 0.025]
Desc. 41: [0.408 +/- 0.026], [0.430 +/- 0.022]



In [36]:

    
# Answer: Probably only for intensity.  
means = results[:,:,:].mean(axis=2)
sems = results[:,:,:].std(axis=2)/np.sqrt(n_splits)
plt.plot(means[:,0] - means[:,1])
plt.xlabel("Descriptor #")
plt.ylabel("ETC - RFC")









    Out[36]:





<matplotlib.text.Text at 0x11079edd8>



In [37]:

    
# How does performance increase with the number of features?  
n_splits = 3
n_features = [10,33,100,333,1000,3333,13914]
results = np.zeros((42,len(n_features),n_splits))
shuffle_split = ShuffleSplit(n_obs,n_splits,test_size=0.2,random_state=0)
for col in range(42):
    for i,max_features in enumerate(n_features):
        rfc = RandomForestRegressor(n_estimators=100,max_features=max_features,min_samples_leaf=1,
                                max_depth=None,oob_score=False,n_jobs=-1,random_state=0)
        for j,(train,test) in enumerate(shuffle_split):
            X = X_all['dragon+all']
            observed = Y_all_mask['mean_std'][:,col]
            rfc.fit(X[train,:],observed[train])
            predicted = rfc.predict(X[test,:])
            results[col,i,j] = np.corrcoef(predicted,observed[test])[1,0]
    means = results[col,:,:].mean(axis=1)
    sems = results[col,:,:].std(axis=1)/np.sqrt(n_splits)
    print(('Desc. %d:'+len(n_features)*' [%.3f],') % \
          tuple([col]+[means[i] for i in range(len(n_features))]))









    



Desc. 0: [0.134], [0.150], [0.171], [0.235], [0.344], [0.547], [0.619],
Desc. 1: [0.635], [0.624], [0.630], [0.637], [0.642], [0.644], [0.669],
Desc. 2: [0.600], [0.595], [0.603], [0.615], [0.605], [0.616], [0.608],
Desc. 3: [0.669], [0.675], [0.682], [0.688], [0.694], [0.698], [0.705],
Desc. 4: [0.647], [0.648], [0.662], [0.670], [0.679], [0.694], [0.726],
Desc. 5: [0.411], [0.417], [0.414], [0.411], [0.405], [0.411], [0.407],
Desc. 6: [0.613], [0.656], [0.639], [0.658], [0.681], [0.688], [0.679],
Desc. 7: [0.314], [0.310], [0.326], [0.326], [0.325], [0.336], [0.360],
Desc. 8: [0.303], [0.311], [0.303], [0.311], [0.299], [0.314], [0.328],
Desc. 9: [0.552], [0.559], [0.563], [0.566], [0.560], [0.569], [0.576],
Desc. 10: [0.372], [0.386], [0.383], [0.389], [0.385], [0.405], [0.421],
Desc. 11: [0.280], [0.288], [0.302], [0.323], [0.363], [0.427], [0.452],
Desc. 12: [0.167], [0.166], [0.163], [0.173], [0.168], [0.175], [0.175],
Desc. 13: [0.400], [0.404], [0.405], [0.411], [0.409], [0.425], [0.418],
Desc. 14: [0.486], [0.487], [0.483], [0.480], [0.478], [0.477], [0.483],
Desc. 15: [0.311], [0.320], [0.313], [0.326], [0.335], [0.355], [0.388],
Desc. 16: [0.539], [0.538], [0.532], [0.521], [0.533], [0.523], [0.520],
Desc. 17: [0.327], [0.324], [0.324], [0.316], [0.322], [0.334], [0.339],
Desc. 18: [0.272], [0.305], [0.326], [0.330], [0.338], [0.338], [0.363],
Desc. 19: [0.490], [0.482], [0.491], [0.497], [0.493], [0.500], [0.516],
Desc. 20: [0.396], [0.410], [0.423], [0.450], [0.524], [0.606], [0.646],
Desc. 21: [-0.065], [-0.071], [-0.069], [-0.065], [-0.067], [-0.057], [-0.017],
Desc. 22: [0.218], [0.215], [0.209], [0.220], [0.220], [0.239], [0.248],
Desc. 23: [0.317], [0.326], [0.328], [0.316], [0.331], [0.328], [0.338],
Desc. 24: [0.470], [0.484], [0.472], [0.479], [0.482], [0.479], [0.485],
Desc. 25: [0.442], [0.448], [0.449], [0.455], [0.458], [0.472], [0.490],
Desc. 26: [0.343], [0.351], [0.353], [0.346], [0.347], [0.354], [0.361],
Desc. 27: [0.435], [0.428], [0.442], [0.438], [0.461], [0.464], [0.484],
Desc. 28: [0.181], [0.188], [0.189], [0.186], [0.197], [0.211], [0.238],
Desc. 29: [0.274], [0.275], [0.262], [0.277], [0.283], [0.288], [0.296],
Desc. 30: [0.454], [0.457], [0.463], [0.471], [0.469], [0.469], [0.474],
Desc. 31: [0.273], [0.280], [0.276], [0.287], [0.285], [0.305], [0.306],
Desc. 32: [0.243], [0.250], [0.257], [0.268], [0.281], [0.352], [0.385],
Desc. 33: [0.090], [0.089], [0.090], [0.087], [0.096], [0.096], [0.112],
Desc. 34: [0.350], [0.350], [0.357], [0.357], [0.358], [0.364], [0.370],
Desc. 35: [0.382], [0.387], [0.377], [0.385], [0.375], [0.376], [0.388],
Desc. 36: [0.232], [0.247], [0.241], [0.246], [0.263], [0.274], [0.302],
Desc. 37: [0.339], [0.323], [0.331], [0.345], [0.338], [0.346], [0.355],
Desc. 38: [0.259], [0.248], [0.258], [0.252], [0.270], [0.261], [0.270],
Desc. 39: [0.114], [0.122], [0.127], [0.136], [0.124], [0.140], [0.145],
Desc. 40: [0.323], [0.335], [0.332], [0.338], [0.335], [0.339], [0.343],
Desc. 41: [0.251], [0.264], [0.268], [0.300], [0.343], [0.419], [0.462],



In [40]:

    
fig,ax = plt.subplots(14,3,sharex=True,sharey=True,figsize=(10,20))
for col in range(42):
    ax_ = ax[int(col / 3), col % 3]
    ax_.errorbar(n_features,results[col,:,:].mean(axis=1),results[col,:,:].std(axis=1)/np.sqrt(n_splits))
    ax_.set_xlim(5,15000)
    ax_.set_ylim(0,0.8)
    ax_.set_yticks(np.linspace(0,0.6,4))
    #ax_.set_xticklabels(n_features,rotation=45)
    ax_.set_xscale('log')
    ax_.set_title('Feature %d' % col)
plt.tight_layout()
fig.text(0.5, 0.00, '# features', ha='center')
fig.text(0.00, 0.5, 'Correlation', va='center', rotation='vertical')









    Out[40]:





<matplotlib.text.Text at 0x195260320>



In [42]:

    
# Does having more samples per leaf or less depth help?  
n_splits = 12
n_samples_leaf = [1,4,16,64]
n_depth = [2,6,15,32,None]
#results = np.zeros((42,len(n_samples_leaf),len(n_depth),n_splits))
shuffle_split = ShuffleSplit(n_obs,n_splits,test_size=0.2,random_state=0)
for col in range(3,42):
    for i,min_samples_leaf in enumerate(n_samples_leaf):
        for j,max_depth in enumerate(n_depth):
            rfc = RandomForestRegressor(n_estimators=25,max_features=None,
                                            min_samples_leaf=min_samples_leaf,
                                            max_depth=max_depth,oob_score=False,
                                            n_jobs=-1,random_state=0)
            X = X_all['dragon+all']
            Y = Y_all_mask['mean_std'][:,col]
            for k,(train,test) in enumerate(shuffle_split):
                observed = Y[test]
                rfc.fit(X[train,:],Y[train])
                predicted = rfc.predict(X[test,:])
                results[col,i,j,k] = np.corrcoef(predicted,observed)[1,0]
            mean = results[col,i,j,:].mean()
            sem = results[col,i,j,:].std()/np.sqrt(n_splits)
            print('Feature %d: %s min samples per leaf, %s max depth: %.3f +/- %.3f' % (col,min_samples_leaf,max_depth,mean,sem))









    



Feature 3: 1 min samples per leaf, 2 max depth: 0.258 +/- 0.083
Feature 3: 1 min samples per leaf, 6 max depth: 0.310 +/- 0.095
Feature 3: 1 min samples per leaf, 15 max depth: 0.314 +/- 0.096
Feature 3: 1 min samples per leaf, 32 max depth: 0.315 +/- 0.096
Feature 3: 1 min samples per leaf, None max depth: 0.315 +/- 0.096
Feature 3: 4 min samples per leaf, 2 max depth: 0.259 +/- 0.083
Feature 3: 4 min samples per leaf, 6 max depth: 0.301 +/- 0.092
Feature 3: 4 min samples per leaf, 15 max depth: 0.306 +/- 0.093
Feature 3: 4 min samples per leaf, 32 max depth: 0.306 +/- 0.093
Feature 3: 4 min samples per leaf, None max depth: 0.306 +/- 0.093
Feature 3: 16 min samples per leaf, 2 max depth: 0.248 +/- 0.079
Feature 3: 16 min samples per leaf, 6 max depth: 0.278 +/- 0.085
Feature 3: 16 min samples per leaf, 15 max depth: 0.278 +/- 0.085
Feature 3: 16 min samples per leaf, 32 max depth: 0.278 +/- 0.085
Feature 3: 16 min samples per leaf, None max depth: 0.278 +/- 0.085
Feature 3: 64 min samples per leaf, 2 max depth: 0.227 +/- 0.070
Feature 3: 64 min samples per leaf, 6 max depth: 0.229 +/- 0.070
Feature 3: 64 min samples per leaf, 15 max depth: 0.229 +/- 0.070
Feature 3: 64 min samples per leaf, 32 max depth: 0.229 +/- 0.070
Feature 3: 64 min samples per leaf, None max depth: 0.229 +/- 0.070
Feature 4: 1 min samples per leaf, 2 max depth: 0.260 +/- 0.084
Feature 4: 1 min samples per leaf, 6 max depth: 0.318 +/- 0.097
Feature 4: 1 min samples per leaf, 15 max depth: 0.321 +/- 0.098
Feature 4: 1 min samples per leaf, 32 max depth: 0.319 +/- 0.097
Feature 4: 1 min samples per leaf, None max depth: 0.318 +/- 0.097
Feature 4: 4 min samples per leaf, 2 max depth: 0.261 +/- 0.084
Feature 4: 4 min samples per leaf, 6 max depth: 0.309 +/- 0.095
Feature 4: 4 min samples per leaf, 15 max depth: 0.310 +/- 0.095
Feature 4: 4 min samples per leaf, 32 max depth: 0.310 +/- 0.095
Feature 4: 4 min samples per leaf, None max depth: 0.310 +/- 0.095
Feature 4: 16 min samples per leaf, 2 max depth: 0.252 +/- 0.082
Feature 4: 16 min samples per leaf, 6 max depth: 0.271 +/- 0.084
Feature 4: 16 min samples per leaf, 15 max depth: 0.271 +/- 0.084
Feature 4: 16 min samples per leaf, 32 max depth: 0.271 +/- 0.084
Feature 4: 16 min samples per leaf, None max depth: 0.271 +/- 0.084
Feature 4: 64 min samples per leaf, 2 max depth: 0.220 +/- 0.068
Feature 4: 64 min samples per leaf, 6 max depth: 0.218 +/- 0.067
Feature 4: 64 min samples per leaf, 15 max depth: 0.218 +/- 0.067
Feature 4: 64 min samples per leaf, 32 max depth: 0.218 +/- 0.067
Feature 4: 64 min samples per leaf, None max depth: 0.218 +/- 0.067
Feature 5: 1 min samples per leaf, 2 max depth: 0.183 +/- 0.060
Feature 5: 1 min samples per leaf, 6 max depth: 0.195 +/- 0.061
Feature 5: 1 min samples per leaf, 15 max depth: 0.195 +/- 0.060
Feature 5: 1 min samples per leaf, 32 max depth: 0.191 +/- 0.060
Feature 5: 1 min samples per leaf, None max depth: 0.192 +/- 0.060
Feature 5: 4 min samples per leaf, 2 max depth: 0.195 +/- 0.063
Feature 5: 4 min samples per leaf, 6 max depth: 0.205 +/- 0.064
Feature 5: 4 min samples per leaf, 15 max depth: 0.201 +/- 0.063
Feature 5: 4 min samples per leaf, 32 max depth: 0.201 +/- 0.062
Feature 5: 4 min samples per leaf, None max depth: 0.200 +/- 0.062
Feature 5: 16 min samples per leaf, 2 max depth: 0.201 +/- 0.065
Feature 5: 16 min samples per leaf, 6 max depth: 0.210 +/- 0.066
Feature 5: 16 min samples per leaf, 15 max depth: 0.210 +/- 0.066
Feature 5: 16 min samples per leaf, 32 max depth: 0.210 +/- 0.066
Feature 5: 16 min samples per leaf, None max depth: 0.210 +/- 0.066
Feature 5: 64 min samples per leaf, 2 max depth: 0.168 +/- 0.054
Feature 5: 64 min samples per leaf, 6 max depth: 0.168 +/- 0.053
Feature 5: 64 min samples per leaf, 15 max depth: 0.168 +/- 0.053
Feature 5: 64 min samples per leaf, 32 max depth: 0.168 +/- 0.053
Feature 5: 64 min samples per leaf, None max depth: 0.168 +/- 0.053
Feature 6: 1 min samples per leaf, 2 max depth: 0.263 +/- 0.087
Feature 6: 1 min samples per leaf, 6 max depth: 0.281 +/- 0.088
Feature 6: 1 min samples per leaf, 15 max depth: 0.279 +/- 0.087
Feature 6: 1 min samples per leaf, 32 max depth: 0.279 +/- 0.087
Feature 6: 1 min samples per leaf, None max depth: 0.279 +/- 0.087
Feature 6: 4 min samples per leaf, 2 max depth: 0.261 +/- 0.086
Feature 6: 4 min samples per leaf, 6 max depth: 0.271 +/- 0.086
Feature 6: 4 min samples per leaf, 15 max depth: 0.275 +/- 0.086
Feature 6: 4 min samples per leaf, 32 max depth: 0.275 +/- 0.086
Feature 6: 4 min samples per leaf, None max depth: 0.275 +/- 0.086
Feature 6: 16 min samples per leaf, 2 max depth: 0.242 +/- 0.074
Feature 6: 16 min samples per leaf, 6 max depth: 0.252 +/- 0.077
Feature 6: 16 min samples per leaf, 15 max depth: 0.253 +/- 0.077
Feature 6: 16 min samples per leaf, 32 max depth: 0.253 +/- 0.077
Feature 6: 16 min samples per leaf, None max depth: 0.253 +/- 0.077
Feature 6: 64 min samples per leaf, 2 max depth: 0.217 +/- 0.066
Feature 6: 64 min samples per leaf, 6 max depth: 0.219 +/- 0.066
Feature 6: 64 min samples per leaf, 15 max depth: 0.219 +/- 0.066
Feature 6: 64 min samples per leaf, 32 max depth: 0.219 +/- 0.066
Feature 6: 64 min samples per leaf, None max depth: 0.219 +/- 0.066
Feature 7: 1 min samples per leaf, 2 max depth: 0.155 +/- 0.052
Feature 7: 1 min samples per leaf, 6 max depth: 0.164 +/- 0.051
Feature 7: 1 min samples per leaf, 15 max depth: 0.149 +/- 0.047
Feature 7: 1 min samples per leaf, 32 max depth: 0.149 +/- 0.047
Feature 7: 1 min samples per leaf, None max depth: 0.149 +/- 0.047
Feature 7: 4 min samples per leaf, 2 max depth: 0.159 +/- 0.054
Feature 7: 4 min samples per leaf, 6 max depth: 0.170 +/- 0.054
Feature 7: 4 min samples per leaf, 15 max depth: 0.155 +/- 0.048
Feature 7: 4 min samples per leaf, 32 max depth: 0.153 +/- 0.048
Feature 7: 4 min samples per leaf, None max depth: 0.153 +/- 0.048
Feature 7: 16 min samples per leaf, 2 max depth: 0.154 +/- 0.052
Feature 7: 16 min samples per leaf, 6 max depth: 0.163 +/- 0.052
Feature 7: 16 min samples per leaf, 15 max depth: 0.158 +/- 0.051
Feature 7: 16 min samples per leaf, 32 max depth: 0.158 +/- 0.051
Feature 7: 16 min samples per leaf, None max depth: 0.158 +/- 0.051
Feature 7: 64 min samples per leaf, 2 max depth: 0.131 +/- 0.042
Feature 7: 64 min samples per leaf, 6 max depth: 0.132 +/- 0.042
Feature 7: 64 min samples per leaf, 15 max depth: 0.132 +/- 0.042
Feature 7: 64 min samples per leaf, 32 max depth: 0.132 +/- 0.042
Feature 7: 64 min samples per leaf, None max depth: 0.132 +/- 0.042
Feature 8: 1 min samples per leaf, 2 max depth: 0.112 +/- 0.038
Feature 8: 1 min samples per leaf, 6 max depth: 0.134 +/- 0.043
Feature 8: 1 min samples per leaf, 15 max depth: 0.142 +/- 0.045
Feature 8: 1 min samples per leaf, 32 max depth: 0.142 +/- 0.044
Feature 8: 1 min samples per leaf, None max depth: 0.142 +/- 0.044
Feature 8: 4 min samples per leaf, 2 max depth: 0.113 +/- 0.038
Feature 8: 4 min samples per leaf, 6 max depth: 0.135 +/- 0.045
Feature 8: 4 min samples per leaf, 15 max depth: 0.142 +/- 0.046
Feature 8: 4 min samples per leaf, 32 max depth: 0.143 +/- 0.046
Feature 8: 4 min samples per leaf, None max depth: 0.143 +/- 0.046
Feature 8: 16 min samples per leaf, 2 max depth: 0.126 +/- 0.042
Feature 8: 16 min samples per leaf, 6 max depth: 0.141 +/- 0.046
Feature 8: 16 min samples per leaf, 15 max depth: 0.143 +/- 0.046
Feature 8: 16 min samples per leaf, 32 max depth: 0.143 +/- 0.046
Feature 8: 16 min samples per leaf, None max depth: 0.143 +/- 0.046
Feature 8: 64 min samples per leaf, 2 max depth: 0.115 +/- 0.038
Feature 8: 64 min samples per leaf, 6 max depth: 0.119 +/- 0.039
Feature 8: 64 min samples per leaf, 15 max depth: 0.119 +/- 0.039
Feature 8: 64 min samples per leaf, 32 max depth: 0.119 +/- 0.039
Feature 8: 64 min samples per leaf, None max depth: 0.119 +/- 0.039
Feature 9: 1 min samples per leaf, 2 max depth: 0.221 +/- 0.068
Feature 9: 1 min samples per leaf, 6 max depth: 0.253 +/- 0.078
Feature 9: 1 min samples per leaf, 15 max depth: 0.252 +/- 0.078
Feature 9: 1 min samples per leaf, 32 max depth: 0.253 +/- 0.078
Feature 9: 1 min samples per leaf, None max depth: 0.253 +/- 0.078
Feature 9: 4 min samples per leaf, 2 max depth: 0.218 +/- 0.068
Feature 9: 4 min samples per leaf, 6 max depth: 0.247 +/- 0.076
Feature 9: 4 min samples per leaf, 15 max depth: 0.248 +/- 0.076
Feature 9: 4 min samples per leaf, 32 max depth: 0.248 +/- 0.076
Feature 9: 4 min samples per leaf, None max depth: 0.248 +/- 0.076
Feature 9: 16 min samples per leaf, 2 max depth: 0.220 +/- 0.068
Feature 9: 16 min samples per leaf, 6 max depth: 0.234 +/- 0.072
Feature 9: 16 min samples per leaf, 15 max depth: 0.233 +/- 0.072
Feature 9: 16 min samples per leaf, 32 max depth: 0.233 +/- 0.072
Feature 9: 16 min samples per leaf, None max depth: 0.233 +/- 0.072
Feature 9: 64 min samples per leaf, 2 max depth: 0.184 +/- 0.057
Feature 9: 64 min samples per leaf, 6 max depth: 0.186 +/- 0.058
Feature 9: 64 min samples per leaf, 15 max depth: 0.186 +/- 0.058
Feature 9: 64 min samples per leaf, 32 max depth: 0.186 +/- 0.058
Feature 9: 64 min samples per leaf, None max depth: 0.186 +/- 0.058
Feature 10: 1 min samples per leaf, 2 max depth: 0.168 +/- 0.064
Feature 10: 1 min samples per leaf, 6 max depth: 0.216 +/- 0.070
Feature 10: 1 min samples per leaf, 15 max depth: 0.222 +/- 0.070
Feature 10: 1 min samples per leaf, 32 max depth: 0.221 +/- 0.070
Feature 10: 1 min samples per leaf, None max depth: 0.221 +/- 0.070
Feature 10: 4 min samples per leaf, 2 max depth: 0.179 +/- 0.063
Feature 10: 4 min samples per leaf, 6 max depth: 0.218 +/- 0.068
Feature 10: 4 min samples per leaf, 15 max depth: 0.217 +/- 0.067
Feature 10: 4 min samples per leaf, 32 max depth: 0.217 +/- 0.068
Feature 10: 4 min samples per leaf, None max depth: 0.217 +/- 0.068
Feature 10: 16 min samples per leaf, 2 max depth: 0.161 +/- 0.051
Feature 10: 16 min samples per leaf, 6 max depth: 0.188 +/- 0.058
Feature 10: 16 min samples per leaf, 15 max depth: 0.189 +/- 0.058
Feature 10: 16 min samples per leaf, 32 max depth: 0.189 +/- 0.058
Feature 10: 16 min samples per leaf, None max depth: 0.189 +/- 0.058
Feature 10: 64 min samples per leaf, 2 max depth: 0.149 +/- 0.047
Feature 10: 64 min samples per leaf, 6 max depth: 0.153 +/- 0.048
Feature 10: 64 min samples per leaf, 15 max depth: 0.153 +/- 0.048
Feature 10: 64 min samples per leaf, 32 max depth: 0.153 +/- 0.048
Feature 10: 64 min samples per leaf, None max depth: 0.153 +/- 0.048
Feature 11: 1 min samples per leaf, 2 max depth: 0.181 +/- 0.056
Feature 11: 1 min samples per leaf, 6 max depth: 0.197 +/- 0.060
Feature 11: 1 min samples per leaf, 15 max depth: 0.201 +/- 0.061
Feature 11: 1 min samples per leaf, 32 max depth: 0.199 +/- 0.061
Feature 11: 1 min samples per leaf, None max depth: 0.199 +/- 0.061
Feature 11: 4 min samples per leaf, 2 max depth: 0.181 +/- 0.055
Feature 11: 4 min samples per leaf, 6 max depth: 0.190 +/- 0.058
Feature 11: 4 min samples per leaf, 15 max depth: 0.189 +/- 0.058
Feature 11: 4 min samples per leaf, 32 max depth: 0.189 +/- 0.058
Feature 11: 4 min samples per leaf, None max depth: 0.189 +/- 0.058
Feature 11: 16 min samples per leaf, 2 max depth: 0.185 +/- 0.058
Feature 11: 16 min samples per leaf, 6 max depth: 0.205 +/- 0.063
Feature 11: 16 min samples per leaf, 15 max depth: 0.206 +/- 0.063
Feature 11: 16 min samples per leaf, 32 max depth: 0.206 +/- 0.063
Feature 11: 16 min samples per leaf, None max depth: 0.206 +/- 0.063
Feature 11: 64 min samples per leaf, 2 max depth: 0.190 +/- 0.059
Feature 11: 64 min samples per leaf, 6 max depth: 0.191 +/- 0.059
Feature 11: 64 min samples per leaf, 15 max depth: 0.191 +/- 0.059
Feature 11: 64 min samples per leaf, 32 max depth: 0.191 +/- 0.059
Feature 11: 64 min samples per leaf, None max depth: 0.191 +/- 0.059
Feature 12: 1 min samples per leaf, 2 max depth: 0.118 +/- 0.041
Feature 12: 1 min samples per leaf, 6 max depth: 0.113 +/- 0.038
Feature 12: 1 min samples per leaf, 15 max depth: 0.100 +/- 0.034
Feature 12: 1 min samples per leaf, 32 max depth: 0.099 +/- 0.033
Feature 12: 1 min samples per leaf, None max depth: 0.099 +/- 0.033
Feature 12: 4 min samples per leaf, 2 max depth: 0.120 +/- 0.043
Feature 12: 4 min samples per leaf, 6 max depth: 0.112 +/- 0.038
Feature 12: 4 min samples per leaf, 15 max depth: 0.099 +/- 0.033
Feature 12: 4 min samples per leaf, 32 max depth: 0.097 +/- 0.033
Feature 12: 4 min samples per leaf, None max depth: 0.097 +/- 0.033
Feature 12: 16 min samples per leaf, 2 max depth: 0.076 +/- 0.027
Feature 12: 16 min samples per leaf, 6 max depth: 0.078 +/- 0.026
Feature 12: 16 min samples per leaf, 15 max depth: 0.075 +/- 0.025
Feature 12: 16 min samples per leaf, 32 max depth: 0.075 +/- 0.025
Feature 12: 16 min samples per leaf, None max depth: 0.075 +/- 0.025
Feature 12: 64 min samples per leaf, 2 max depth: 0.068 +/- 0.027
Feature 12: 64 min samples per leaf, 6 max depth: 0.069 +/- 0.026
Feature 12: 64 min samples per leaf, 15 max depth: 0.069 +/- 0.026
Feature 12: 64 min samples per leaf, 32 max depth: 0.069 +/- 0.026
Feature 12: 64 min samples per leaf, None max depth: 0.069 +/- 0.026
Feature 13: 1 min samples per leaf, 2 max depth: 0.167 +/- 0.055
Feature 13: 1 min samples per leaf, 6 max depth: 0.201 +/- 0.064
Feature 13: 1 min samples per leaf, 15 max depth: 0.196 +/- 0.061
Feature 13: 1 min samples per leaf, 32 max depth: 0.195 +/- 0.061
Feature 13: 1 min samples per leaf, None max depth: 0.195 +/- 0.061
Feature 13: 4 min samples per leaf, 2 max depth: 0.169 +/- 0.055
Feature 13: 4 min samples per leaf, 6 max depth: 0.193 +/- 0.061
Feature 13: 4 min samples per leaf, 15 max depth: 0.191 +/- 0.060
Feature 13: 4 min samples per leaf, 32 max depth: 0.191 +/- 0.059
Feature 13: 4 min samples per leaf, None max depth: 0.191 +/- 0.059
Feature 13: 16 min samples per leaf, 2 max depth: 0.180 +/- 0.057
Feature 13: 16 min samples per leaf, 6 max depth: 0.200 +/- 0.063
Feature 13: 16 min samples per leaf, 15 max depth: 0.199 +/- 0.062
Feature 13: 16 min samples per leaf, 32 max depth: 0.199 +/- 0.062
Feature 13: 16 min samples per leaf, None max depth: 0.199 +/- 0.062
Feature 13: 64 min samples per leaf, 2 max depth: 0.163 +/- 0.051
Feature 13: 64 min samples per leaf, 6 max depth: 0.168 +/- 0.052
Feature 13: 64 min samples per leaf, 15 max depth: 0.168 +/- 0.052
Feature 13: 64 min samples per leaf, 32 max depth: 0.168 +/- 0.052
Feature 13: 64 min samples per leaf, None max depth: 0.168 +/- 0.052
Feature 14: 1 min samples per leaf, 2 max depth: 0.181 +/- 0.059
Feature 14: 1 min samples per leaf, 6 max depth: 0.220 +/- 0.068
Feature 14: 1 min samples per leaf, 15 max depth: 0.230 +/- 0.071
Feature 14: 1 min samples per leaf, 32 max depth: 0.227 +/- 0.070
Feature 14: 1 min samples per leaf, None max depth: 0.227 +/- 0.070
Feature 14: 4 min samples per leaf, 2 max depth: 0.180 +/- 0.058
Feature 14: 4 min samples per leaf, 6 max depth: 0.218 +/- 0.067
Feature 14: 4 min samples per leaf, 15 max depth: 0.225 +/- 0.069
Feature 14: 4 min samples per leaf, 32 max depth: 0.224 +/- 0.069
Feature 14: 4 min samples per leaf, None max depth: 0.224 +/- 0.069
Feature 14: 16 min samples per leaf, 2 max depth: 0.199 +/- 0.063
Feature 14: 16 min samples per leaf, 6 max depth: 0.220 +/- 0.068
Feature 14: 16 min samples per leaf, 15 max depth: 0.222 +/- 0.068
Feature 14: 16 min samples per leaf, 32 max depth: 0.222 +/- 0.068
Feature 14: 16 min samples per leaf, None max depth: 0.222 +/- 0.068
Feature 14: 64 min samples per leaf, 2 max depth: 0.177 +/- 0.055
Feature 14: 64 min samples per leaf, 6 max depth: 0.182 +/- 0.056
Feature 14: 64 min samples per leaf, 15 max depth: 0.182 +/- 0.056
Feature 14: 64 min samples per leaf, 32 max depth: 0.182 +/- 0.056
Feature 14: 64 min samples per leaf, None max depth: 0.182 +/- 0.056
Feature 15: 1 min samples per leaf, 2 max depth: 0.145 +/- 0.045
Feature 15: 1 min samples per leaf, 6 max depth: 0.153 +/- 0.050
Feature 15: 1 min samples per leaf, 15 max depth: 0.150 +/- 0.048
Feature 15: 1 min samples per leaf, 32 max depth: 0.146 +/- 0.047
Feature 15: 1 min samples per leaf, None max depth: 0.146 +/- 0.047
Feature 15: 4 min samples per leaf, 2 max depth: 0.146 +/- 0.045
Feature 15: 4 min samples per leaf, 6 max depth: 0.147 +/- 0.046
Feature 15: 4 min samples per leaf, 15 max depth: 0.137 +/- 0.043
Feature 15: 4 min samples per leaf, 32 max depth: 0.137 +/- 0.043
Feature 15: 4 min samples per leaf, None max depth: 0.137 +/- 0.043
Feature 15: 16 min samples per leaf, 2 max depth: 0.145 +/- 0.045
Feature 15: 16 min samples per leaf, 6 max depth: 0.149 +/- 0.047
Feature 15: 16 min samples per leaf, 15 max depth: 0.149 +/- 0.047
Feature 15: 16 min samples per leaf, 32 max depth: 0.149 +/- 0.047
Feature 15: 16 min samples per leaf, None max depth: 0.149 +/- 0.047
Feature 15: 64 min samples per leaf, 2 max depth: 0.144 +/- 0.045
Feature 15: 64 min samples per leaf, 6 max depth: 0.148 +/- 0.046
Feature 15: 64 min samples per leaf, 15 max depth: 0.148 +/- 0.046
Feature 15: 64 min samples per leaf, 32 max depth: 0.148 +/- 0.046
Feature 15: 64 min samples per leaf, None max depth: 0.148 +/- 0.046
Feature 16: 1 min samples per leaf, 2 max depth: 0.204 +/- 0.069
Feature 16: 1 min samples per leaf, 6 max depth: 0.243 +/- 0.077
Feature 16: 1 min samples per leaf, 15 max depth: 0.242 +/- 0.076
Feature 16: 1 min samples per leaf, 32 max depth: 0.236 +/- 0.075
Feature 16: 1 min samples per leaf, None max depth: 0.237 +/- 0.075
Feature 16: 4 min samples per leaf, 2 max depth: 0.206 +/- 0.069
Feature 16: 4 min samples per leaf, 6 max depth: 0.231 +/- 0.073
Feature 16: 4 min samples per leaf, 15 max depth: 0.231 +/- 0.072
Feature 16: 4 min samples per leaf, 32 max depth: 0.232 +/- 0.073
Feature 16: 4 min samples per leaf, None max depth: 0.232 +/- 0.073
Feature 16: 16 min samples per leaf, 2 max depth: 0.215 +/- 0.071
Feature 16: 16 min samples per leaf, 6 max depth: 0.230 +/- 0.073
Feature 16: 16 min samples per leaf, 15 max depth: 0.231 +/- 0.073
Feature 16: 16 min samples per leaf, 32 max depth: 0.231 +/- 0.073
Feature 16: 16 min samples per leaf, None max depth: 0.231 +/- 0.073
Feature 16: 64 min samples per leaf, 2 max depth: 0.170 +/- 0.055
Feature 16: 64 min samples per leaf, 6 max depth: 0.173 +/- 0.055
Feature 16: 64 min samples per leaf, 15 max depth: 0.173 +/- 0.055
Feature 16: 64 min samples per leaf, 32 max depth: 0.173 +/- 0.055
Feature 16: 64 min samples per leaf, None max depth: 0.173 +/- 0.055
Feature 17: 1 min samples per leaf, 2 max depth: 0.156 +/- 0.053
Feature 17: 1 min samples per leaf, 6 max depth: 0.149 +/- 0.051
Feature 17: 1 min samples per leaf, 15 max depth: 0.145 +/- 0.049
Feature 17: 1 min samples per leaf, 32 max depth: 0.144 +/- 0.049
Feature 17: 1 min samples per leaf, None max depth: 0.144 +/- 0.049
Feature 17: 4 min samples per leaf, 2 max depth: 0.158 +/- 0.054
Feature 17: 4 min samples per leaf, 6 max depth: 0.153 +/- 0.052
Feature 17: 4 min samples per leaf, 15 max depth: 0.148 +/- 0.050
Feature 17: 4 min samples per leaf, 32 max depth: 0.148 +/- 0.050
Feature 17: 4 min samples per leaf, None max depth: 0.148 +/- 0.050
Feature 17: 16 min samples per leaf, 2 max depth: 0.162 +/- 0.055
Feature 17: 16 min samples per leaf, 6 max depth: 0.156 +/- 0.051
Feature 17: 16 min samples per leaf, 15 max depth: 0.155 +/- 0.050
Feature 17: 16 min samples per leaf, 32 max depth: 0.155 +/- 0.050
Feature 17: 16 min samples per leaf, None max depth: 0.155 +/- 0.050
Feature 17: 64 min samples per leaf, 2 max depth: 0.129 +/- 0.043
Feature 17: 64 min samples per leaf, 6 max depth: 0.130 +/- 0.043
Feature 17: 64 min samples per leaf, 15 max depth: 0.130 +/- 0.043
Feature 17: 64 min samples per leaf, 32 max depth: 0.130 +/- 0.043
Feature 17: 64 min samples per leaf, None max depth: 0.130 +/- 0.043
Feature 18: 1 min samples per leaf, 2 max depth: 0.136 +/- 0.056
Feature 18: 1 min samples per leaf, 6 max depth: 0.151 +/- 0.056
Feature 18: 1 min samples per leaf, 15 max depth: 0.136 +/- 0.049
Feature 18: 1 min samples per leaf, 32 max depth: 0.131 +/- 0.047
Feature 18: 1 min samples per leaf, None max depth: 0.131 +/- 0.047
Feature 18: 4 min samples per leaf, 2 max depth: 0.138 +/- 0.054
Feature 18: 4 min samples per leaf, 6 max depth: 0.151 +/- 0.055
Feature 18: 4 min samples per leaf, 15 max depth: 0.127 +/- 0.045
Feature 18: 4 min samples per leaf, 32 max depth: 0.123 +/- 0.044
Feature 18: 4 min samples per leaf, None max depth: 0.123 +/- 0.044
Feature 18: 16 min samples per leaf, 2 max depth: 0.105 +/- 0.041
Feature 18: 16 min samples per leaf, 6 max depth: 0.112 +/- 0.040
Feature 18: 16 min samples per leaf, 15 max depth: 0.104 +/- 0.037
Feature 18: 16 min samples per leaf, 32 max depth: 0.104 +/- 0.037
Feature 18: 16 min samples per leaf, None max depth: 0.104 +/- 0.037
Feature 18: 64 min samples per leaf, 2 max depth: 0.083 +/- 0.029
Feature 18: 64 min samples per leaf, 6 max depth: 0.082 +/- 0.029
Feature 18: 64 min samples per leaf, 15 max depth: 0.082 +/- 0.029
Feature 18: 64 min samples per leaf, 32 max depth: 0.082 +/- 0.029
Feature 18: 64 min samples per leaf, None max depth: 0.082 +/- 0.029
Feature 19: 1 min samples per leaf, 2 max depth: 0.225 +/- 0.069
Feature 19: 1 min samples per leaf, 6 max depth: 0.261 +/- 0.080
Feature 19: 1 min samples per leaf, 15 max depth: 0.252 +/- 0.077
Feature 19: 1 min samples per leaf, 32 max depth: 0.254 +/- 0.078
Feature 19: 1 min samples per leaf, None max depth: 0.254 +/- 0.078
Feature 19: 4 min samples per leaf, 2 max depth: 0.232 +/- 0.071
Feature 19: 4 min samples per leaf, 6 max depth: 0.264 +/- 0.080
Feature 19: 4 min samples per leaf, 15 max depth: 0.261 +/- 0.079
Feature 19: 4 min samples per leaf, 32 max depth: 0.261 +/- 0.079
Feature 19: 4 min samples per leaf, None max depth: 0.261 +/- 0.079
Feature 19: 16 min samples per leaf, 2 max depth: 0.230 +/- 0.070
Feature 19: 16 min samples per leaf, 6 max depth: 0.253 +/- 0.077
Feature 19: 16 min samples per leaf, 15 max depth: 0.253 +/- 0.077
Feature 19: 16 min samples per leaf, 32 max depth: 0.253 +/- 0.077
Feature 19: 16 min samples per leaf, None max depth: 0.253 +/- 0.077
Feature 19: 64 min samples per leaf, 2 max depth: 0.214 +/- 0.066
Feature 19: 64 min samples per leaf, 6 max depth: 0.219 +/- 0.067
Feature 19: 64 min samples per leaf, 15 max depth: 0.219 +/- 0.067
Feature 19: 64 min samples per leaf, 32 max depth: 0.219 +/- 0.067
Feature 19: 64 min samples per leaf, None max depth: 0.219 +/- 0.067
Feature 20: 1 min samples per leaf, 2 max depth: 0.243 +/- 0.075
Feature 20: 1 min samples per leaf, 6 max depth: 0.281 +/- 0.086
Feature 20: 1 min samples per leaf, 15 max depth: 0.284 +/- 0.087
Feature 20: 1 min samples per leaf, 32 max depth: 0.284 +/- 0.087
Feature 20: 1 min samples per leaf, None max depth: 0.284 +/- 0.087
Feature 20: 4 min samples per leaf, 2 max depth: 0.243 +/- 0.075
Feature 20: 4 min samples per leaf, 6 max depth: 0.280 +/- 0.085
Feature 20: 4 min samples per leaf, 15 max depth: 0.278 +/- 0.085
Feature 20: 4 min samples per leaf, 32 max depth: 0.278 +/- 0.085
Feature 20: 4 min samples per leaf, None max depth: 0.278 +/- 0.085
Feature 20: 16 min samples per leaf, 2 max depth: 0.248 +/- 0.076
Feature 20: 16 min samples per leaf, 6 max depth: 0.279 +/- 0.085
Feature 20: 16 min samples per leaf, 15 max depth: 0.278 +/- 0.084
Feature 20: 16 min samples per leaf, 32 max depth: 0.278 +/- 0.084
Feature 20: 16 min samples per leaf, None max depth: 0.278 +/- 0.084
Feature 20: 64 min samples per leaf, 2 max depth: 0.254 +/- 0.077
Feature 20: 64 min samples per leaf, 6 max depth: 0.255 +/- 0.077
Feature 20: 64 min samples per leaf, 15 max depth: 0.255 +/- 0.077
Feature 20: 64 min samples per leaf, 32 max depth: 0.255 +/- 0.077
Feature 20: 64 min samples per leaf, None max depth: 0.255 +/- 0.077
Feature 21: 1 min samples per leaf, 2 max depth: 0.001 +/- 0.008
Feature 21: 1 min samples per leaf, 6 max depth: -0.029 +/- 0.019
Feature 21: 1 min samples per leaf, 15 max depth: -0.041 +/- 0.022
Feature 21: 1 min samples per leaf, 32 max depth: -0.039 +/- 0.023
Feature 21: 1 min samples per leaf, None max depth: -0.039 +/- 0.023
Feature 21: 4 min samples per leaf, 2 max depth: -0.002 +/- 0.009
Feature 21: 4 min samples per leaf, 6 max depth: -0.035 +/- 0.019
Feature 21: 4 min samples per leaf, 15 max depth: -0.054 +/- 0.025
Feature 21: 4 min samples per leaf, 32 max depth: -0.053 +/- 0.025
Feature 21: 4 min samples per leaf, None max depth: -0.053 +/- 0.025
Feature 21: 16 min samples per leaf, 2 max depth: 0.001 +/- 0.011
Feature 21: 16 min samples per leaf, 6 max depth: -0.040 +/- 0.021
Feature 21: 16 min samples per leaf, 15 max depth: -0.046 +/- 0.023
Feature 21: 16 min samples per leaf, 32 max depth: -0.046 +/- 0.023
Feature 21: 16 min samples per leaf, None max depth: -0.046 +/- 0.023
Feature 21: 64 min samples per leaf, 2 max depth: 0.004 +/- 0.013
Feature 21: 64 min samples per leaf, 6 max depth: -0.004 +/- 0.015
Feature 21: 64 min samples per leaf, 15 max depth: -0.004 +/- 0.015
Feature 21: 64 min samples per leaf, 32 max depth: -0.004 +/- 0.015
Feature 21: 64 min samples per leaf, None max depth: -0.004 +/- 0.015
Feature 22: 1 min samples per leaf, 2 max depth: 0.056 +/- 0.023
Feature 22: 1 min samples per leaf, 6 max depth: 0.090 +/- 0.030
Feature 22: 1 min samples per leaf, 15 max depth: 0.106 +/- 0.034
Feature 22: 1 min samples per leaf, 32 max depth: 0.108 +/- 0.034
Feature 22: 1 min samples per leaf, None max depth: 0.108 +/- 0.034
Feature 22: 4 min samples per leaf, 2 max depth: 0.058 +/- 0.023
Feature 22: 4 min samples per leaf, 6 max depth: 0.086 +/- 0.028
Feature 22: 4 min samples per leaf, 15 max depth: 0.102 +/- 0.033
Feature 22: 4 min samples per leaf, 32 max depth: 0.102 +/- 0.033
Feature 22: 4 min samples per leaf, None max depth: 0.102 +/- 0.033
Feature 22: 16 min samples per leaf, 2 max depth: 0.064 +/- 0.026
Feature 22: 16 min samples per leaf, 6 max depth: 0.088 +/- 0.029
Feature 22: 16 min samples per leaf, 15 max depth: 0.094 +/- 0.030
Feature 22: 16 min samples per leaf, 32 max depth: 0.094 +/- 0.030
Feature 22: 16 min samples per leaf, None max depth: 0.094 +/- 0.030
Feature 22: 64 min samples per leaf, 2 max depth: 0.063 +/- 0.022
Feature 22: 64 min samples per leaf, 6 max depth: 0.069 +/- 0.024
Feature 22: 64 min samples per leaf, 15 max depth: 0.069 +/- 0.024
Feature 22: 64 min samples per leaf, 32 max depth: 0.069 +/- 0.024
Feature 22: 64 min samples per leaf, None max depth: 0.069 +/- 0.024
Feature 23: 1 min samples per leaf, 2 max depth: 0.154 +/- 0.050
Feature 23: 1 min samples per leaf, 6 max depth: 0.178 +/- 0.055
Feature 23: 1 min samples per leaf, 15 max depth: 0.156 +/- 0.049
Feature 23: 1 min samples per leaf, 32 max depth: 0.155 +/- 0.049
Feature 23: 1 min samples per leaf, None max depth: 0.155 +/- 0.049
Feature 23: 4 min samples per leaf, 2 max depth: 0.152 +/- 0.049
Feature 23: 4 min samples per leaf, 6 max depth: 0.177 +/- 0.055
Feature 23: 4 min samples per leaf, 15 max depth: 0.158 +/- 0.050
Feature 23: 4 min samples per leaf, 32 max depth: 0.156 +/- 0.049
Feature 23: 4 min samples per leaf, None max depth: 0.156 +/- 0.049
Feature 23: 16 min samples per leaf, 2 max depth: 0.128 +/- 0.043
Feature 23: 16 min samples per leaf, 6 max depth: 0.146 +/- 0.046
Feature 23: 16 min samples per leaf, 15 max depth: 0.140 +/- 0.044
Feature 23: 16 min samples per leaf, 32 max depth: 0.140 +/- 0.044
Feature 23: 16 min samples per leaf, None max depth: 0.140 +/- 0.044
Feature 23: 64 min samples per leaf, 2 max depth: 0.128 +/- 0.043
Feature 23: 64 min samples per leaf, 6 max depth: 0.128 +/- 0.042
Feature 23: 64 min samples per leaf, 15 max depth: 0.128 +/- 0.042
Feature 23: 64 min samples per leaf, 32 max depth: 0.128 +/- 0.042
Feature 23: 64 min samples per leaf, None max depth: 0.128 +/- 0.042
Feature 24: 1 min samples per leaf, 2 max depth: 0.199 +/- 0.061
Feature 24: 1 min samples per leaf, 6 max depth: 0.233 +/- 0.071
Feature 24: 1 min samples per leaf, 15 max depth: 0.229 +/- 0.070
Feature 24: 1 min samples per leaf, 32 max depth: 0.230 +/- 0.070
Feature 24: 1 min samples per leaf, None max depth: 0.230 +/- 0.070
Feature 24: 4 min samples per leaf, 2 max depth: 0.198 +/- 0.061
Feature 24: 4 min samples per leaf, 6 max depth: 0.233 +/- 0.071
Feature 24: 4 min samples per leaf, 15 max depth: 0.231 +/- 0.070
Feature 24: 4 min samples per leaf, 32 max depth: 0.231 +/- 0.070
Feature 24: 4 min samples per leaf, None max depth: 0.231 +/- 0.070
Feature 24: 16 min samples per leaf, 2 max depth: 0.200 +/- 0.061
Feature 24: 16 min samples per leaf, 6 max depth: 0.230 +/- 0.070
Feature 24: 16 min samples per leaf, 15 max depth: 0.231 +/- 0.070
Feature 24: 16 min samples per leaf, 32 max depth: 0.231 +/- 0.070
Feature 24: 16 min samples per leaf, None max depth: 0.231 +/- 0.070
Feature 24: 64 min samples per leaf, 2 max depth: 0.194 +/- 0.059
Feature 24: 64 min samples per leaf, 6 max depth: 0.200 +/- 0.061
Feature 24: 64 min samples per leaf, 15 max depth: 0.200 +/- 0.061
Feature 24: 64 min samples per leaf, 32 max depth: 0.200 +/- 0.061
Feature 24: 64 min samples per leaf, None max depth: 0.200 +/- 0.061
Feature 25: 1 min samples per leaf, 2 max depth: 0.223 +/- 0.069
Feature 25: 1 min samples per leaf, 6 max depth: 0.250 +/- 0.076
Feature 25: 1 min samples per leaf, 15 max depth: 0.236 +/- 0.072
Feature 25: 1 min samples per leaf, 32 max depth: 0.238 +/- 0.073
Feature 25: 1 min samples per leaf, None max depth: 0.238 +/- 0.073
Feature 25: 4 min samples per leaf, 2 max depth: 0.223 +/- 0.069
Feature 25: 4 min samples per leaf, 6 max depth: 0.248 +/- 0.076
Feature 25: 4 min samples per leaf, 15 max depth: 0.240 +/- 0.073
Feature 25: 4 min samples per leaf, 32 max depth: 0.240 +/- 0.073
Feature 25: 4 min samples per leaf, None max depth: 0.240 +/- 0.073
Feature 25: 16 min samples per leaf, 2 max depth: 0.223 +/- 0.069
Feature 25: 16 min samples per leaf, 6 max depth: 0.242 +/- 0.074
Feature 25: 16 min samples per leaf, 15 max depth: 0.240 +/- 0.073
Feature 25: 16 min samples per leaf, 32 max depth: 0.240 +/- 0.073
Feature 25: 16 min samples per leaf, None max depth: 0.240 +/- 0.073
Feature 25: 64 min samples per leaf, 2 max depth: 0.207 +/- 0.064
Feature 25: 64 min samples per leaf, 6 max depth: 0.210 +/- 0.064
Feature 25: 64 min samples per leaf, 15 max depth: 0.210 +/- 0.064
Feature 25: 64 min samples per leaf, 32 max depth: 0.210 +/- 0.064
Feature 25: 64 min samples per leaf, None max depth: 0.210 +/- 0.064
Feature 26: 1 min samples per leaf, 2 max depth: 0.186 +/- 0.059
Feature 26: 1 min samples per leaf, 6 max depth: 0.186 +/- 0.057
Feature 26: 1 min samples per leaf, 15 max depth: 0.176 +/- 0.054
Feature 26: 1 min samples per leaf, 32 max depth: 0.173 +/- 0.054
Feature 26: 1 min samples per leaf, None max depth: 0.174 +/- 0.054
Feature 26: 4 min samples per leaf, 2 max depth: 0.186 +/- 0.059
Feature 26: 4 min samples per leaf, 6 max depth: 0.185 +/- 0.057
Feature 26: 4 min samples per leaf, 15 max depth: 0.176 +/- 0.054
Feature 26: 4 min samples per leaf, 32 max depth: 0.175 +/- 0.054
Feature 26: 4 min samples per leaf, None max depth: 0.175 +/- 0.054
Feature 26: 16 min samples per leaf, 2 max depth: 0.188 +/- 0.059
Feature 26: 16 min samples per leaf, 6 max depth: 0.192 +/- 0.059
Feature 26: 16 min samples per leaf, 15 max depth: 0.191 +/- 0.059
Feature 26: 16 min samples per leaf, 32 max depth: 0.191 +/- 0.059
Feature 26: 16 min samples per leaf, None max depth: 0.191 +/- 0.059
Feature 26: 64 min samples per leaf, 2 max depth: 0.164 +/- 0.052
Feature 26: 64 min samples per leaf, 6 max depth: 0.163 +/- 0.051
Feature 26: 64 min samples per leaf, 15 max depth: 0.163 +/- 0.051
Feature 26: 64 min samples per leaf, 32 max depth: 0.163 +/- 0.051
Feature 26: 64 min samples per leaf, None max depth: 0.163 +/- 0.051
Feature 27: 1 min samples per leaf, 2 max depth: 0.219 +/- 0.068
Feature 27: 1 min samples per leaf, 6 max depth: 0.229 +/- 0.070
Feature 27: 1 min samples per leaf, 15 max depth: 0.218 +/- 0.066
Feature 27: 1 min samples per leaf, 32 max depth: 0.220 +/- 0.067
Feature 27: 1 min samples per leaf, None max depth: 0.220 +/- 0.067
Feature 27: 4 min samples per leaf, 2 max depth: 0.220 +/- 0.068
Feature 27: 4 min samples per leaf, 6 max depth: 0.229 +/- 0.070
Feature 27: 4 min samples per leaf, 15 max depth: 0.221 +/- 0.067
Feature 27: 4 min samples per leaf, 32 max depth: 0.221 +/- 0.067
Feature 27: 4 min samples per leaf, None max depth: 0.221 +/- 0.067
Feature 27: 16 min samples per leaf, 2 max depth: 0.214 +/- 0.065
Feature 27: 16 min samples per leaf, 6 max depth: 0.224 +/- 0.068
Feature 27: 16 min samples per leaf, 15 max depth: 0.224 +/- 0.068
Feature 27: 16 min samples per leaf, 32 max depth: 0.224 +/- 0.068
Feature 27: 16 min samples per leaf, None max depth: 0.224 +/- 0.068
Feature 27: 64 min samples per leaf, 2 max depth: 0.199 +/- 0.061
Feature 27: 64 min samples per leaf, 6 max depth: 0.202 +/- 0.062
Feature 27: 64 min samples per leaf, 15 max depth: 0.202 +/- 0.062
Feature 27: 64 min samples per leaf, 32 max depth: 0.202 +/- 0.062
Feature 27: 64 min samples per leaf, None max depth: 0.202 +/- 0.062
Feature 28: 1 min samples per leaf, 2 max depth: 0.125 +/- 0.041
Feature 28: 1 min samples per leaf, 6 max depth: 0.112 +/- 0.036
Feature 28: 1 min samples per leaf, 15 max depth: 0.101 +/- 0.032
Feature 28: 1 min samples per leaf, 32 max depth: 0.101 +/- 0.032
Feature 28: 1 min samples per leaf, None max depth: 0.101 +/- 0.032
Feature 28: 4 min samples per leaf, 2 max depth: 0.125 +/- 0.041
Feature 28: 4 min samples per leaf, 6 max depth: 0.112 +/- 0.035
Feature 28: 4 min samples per leaf, 15 max depth: 0.100 +/- 0.031
Feature 28: 4 min samples per leaf, 32 max depth: 0.100 +/- 0.031
Feature 28: 4 min samples per leaf, None max depth: 0.100 +/- 0.031
Feature 28: 16 min samples per leaf, 2 max depth: 0.123 +/- 0.040
Feature 28: 16 min samples per leaf, 6 max depth: 0.120 +/- 0.038
Feature 28: 16 min samples per leaf, 15 max depth: 0.114 +/- 0.037
Feature 28: 16 min samples per leaf, 32 max depth: 0.114 +/- 0.037
Feature 28: 16 min samples per leaf, None max depth: 0.114 +/- 0.037
Feature 28: 64 min samples per leaf, 2 max depth: 0.103 +/- 0.033
Feature 28: 64 min samples per leaf, 6 max depth: 0.103 +/- 0.032
Feature 28: 64 min samples per leaf, 15 max depth: 0.103 +/- 0.032
Feature 28: 64 min samples per leaf, 32 max depth: 0.103 +/- 0.032
Feature 28: 64 min samples per leaf, None max depth: 0.103 +/- 0.032
Feature 29: 1 min samples per leaf, 2 max depth: 0.105 +/- 0.034
Feature 29: 1 min samples per leaf, 6 max depth: 0.119 +/- 0.037
Feature 29: 1 min samples per leaf, 15 max depth: 0.127 +/- 0.039
Feature 29: 1 min samples per leaf, 32 max depth: 0.129 +/- 0.040
Feature 29: 1 min samples per leaf, None max depth: 0.129 +/- 0.040
Feature 29: 4 min samples per leaf, 2 max depth: 0.106 +/- 0.034
Feature 29: 4 min samples per leaf, 6 max depth: 0.119 +/- 0.037
Feature 29: 4 min samples per leaf, 15 max depth: 0.124 +/- 0.038
Feature 29: 4 min samples per leaf, 32 max depth: 0.125 +/- 0.039
Feature 29: 4 min samples per leaf, None max depth: 0.125 +/- 0.039
Feature 29: 16 min samples per leaf, 2 max depth: 0.108 +/- 0.035
Feature 29: 16 min samples per leaf, 6 max depth: 0.117 +/- 0.037
Feature 29: 16 min samples per leaf, 15 max depth: 0.120 +/- 0.038
Feature 29: 16 min samples per leaf, 32 max depth: 0.120 +/- 0.038
Feature 29: 16 min samples per leaf, None max depth: 0.120 +/- 0.038
Feature 29: 64 min samples per leaf, 2 max depth: 0.117 +/- 0.038
Feature 29: 64 min samples per leaf, 6 max depth: 0.122 +/- 0.039
Feature 29: 64 min samples per leaf, 15 max depth: 0.122 +/- 0.039
Feature 29: 64 min samples per leaf, 32 max depth: 0.122 +/- 0.039
Feature 29: 64 min samples per leaf, None max depth: 0.122 +/- 0.039
Feature 30: 1 min samples per leaf, 2 max depth: 0.180 +/- 0.057
Feature 30: 1 min samples per leaf, 6 max depth: 0.211 +/- 0.066
Feature 30: 1 min samples per leaf, 15 max depth: 0.210 +/- 0.065
Feature 30: 1 min samples per leaf, 32 max depth: 0.210 +/- 0.065
Feature 30: 1 min samples per leaf, None max depth: 0.210 +/- 0.065
Feature 30: 4 min samples per leaf, 2 max depth: 0.180 +/- 0.057
Feature 30: 4 min samples per leaf, 6 max depth: 0.209 +/- 0.065
Feature 30: 4 min samples per leaf, 15 max depth: 0.206 +/- 0.064
Feature 30: 4 min samples per leaf, 32 max depth: 0.206 +/- 0.064
Feature 30: 4 min samples per leaf, None max depth: 0.206 +/- 0.064
Feature 30: 16 min samples per leaf, 2 max depth: 0.182 +/- 0.058
Feature 30: 16 min samples per leaf, 6 max depth: 0.202 +/- 0.064
Feature 30: 16 min samples per leaf, 15 max depth: 0.203 +/- 0.064
Feature 30: 16 min samples per leaf, 32 max depth: 0.203 +/- 0.064
Feature 30: 16 min samples per leaf, None max depth: 0.203 +/- 0.064
Feature 30: 64 min samples per leaf, 2 max depth: 0.161 +/- 0.052
Feature 30: 64 min samples per leaf, 6 max depth: 0.165 +/- 0.053
Feature 30: 64 min samples per leaf, 15 max depth: 0.165 +/- 0.053
Feature 30: 64 min samples per leaf, 32 max depth: 0.165 +/- 0.053
Feature 30: 64 min samples per leaf, None max depth: 0.165 +/- 0.053
Feature 31: 1 min samples per leaf, 2 max depth: 0.155 +/- 0.049
Feature 31: 1 min samples per leaf, 6 max depth: 0.173 +/- 0.054
Feature 31: 1 min samples per leaf, 15 max depth: 0.161 +/- 0.050
Feature 31: 1 min samples per leaf, 32 max depth: 0.160 +/- 0.050
Feature 31: 1 min samples per leaf, None max depth: 0.160 +/- 0.050
Feature 31: 4 min samples per leaf, 2 max depth: 0.156 +/- 0.049
Feature 31: 4 min samples per leaf, 6 max depth: 0.168 +/- 0.052
Feature 31: 4 min samples per leaf, 15 max depth: 0.153 +/- 0.048
Feature 31: 4 min samples per leaf, 32 max depth: 0.152 +/- 0.048
Feature 31: 4 min samples per leaf, None max depth: 0.152 +/- 0.048
Feature 31: 16 min samples per leaf, 2 max depth: 0.148 +/- 0.047
Feature 31: 16 min samples per leaf, 6 max depth: 0.159 +/- 0.049
Feature 31: 16 min samples per leaf, 15 max depth: 0.157 +/- 0.049
Feature 31: 16 min samples per leaf, 32 max depth: 0.157 +/- 0.049
Feature 31: 16 min samples per leaf, None max depth: 0.157 +/- 0.049
Feature 31: 64 min samples per leaf, 2 max depth: 0.137 +/- 0.044
Feature 31: 64 min samples per leaf, 6 max depth: 0.139 +/- 0.045
Feature 31: 64 min samples per leaf, 15 max depth: 0.139 +/- 0.045
Feature 31: 64 min samples per leaf, 32 max depth: 0.139 +/- 0.045
Feature 31: 64 min samples per leaf, None max depth: 0.139 +/- 0.045
Feature 32: 1 min samples per leaf, 2 max depth: 0.172 +/- 0.053
Feature 32: 1 min samples per leaf, 6 max depth: 0.165 +/- 0.051
Feature 32: 1 min samples per leaf, 15 max depth: 0.163 +/- 0.051
Feature 32: 1 min samples per leaf, 32 max depth: 0.164 +/- 0.051
Feature 32: 1 min samples per leaf, None max depth: 0.164 +/- 0.051
Feature 32: 4 min samples per leaf, 2 max depth: 0.172 +/- 0.053
Feature 32: 4 min samples per leaf, 6 max depth: 0.166 +/- 0.052
Feature 32: 4 min samples per leaf, 15 max depth: 0.163 +/- 0.051
Feature 32: 4 min samples per leaf, 32 max depth: 0.163 +/- 0.051
Feature 32: 4 min samples per leaf, None max depth: 0.163 +/- 0.051
Feature 32: 16 min samples per leaf, 2 max depth: 0.174 +/- 0.054
Feature 32: 16 min samples per leaf, 6 max depth: 0.169 +/- 0.053
Feature 32: 16 min samples per leaf, 15 max depth: 0.169 +/- 0.053
Feature 32: 16 min samples per leaf, 32 max depth: 0.169 +/- 0.053
Feature 32: 16 min samples per leaf, None max depth: 0.169 +/- 0.053
Feature 32: 64 min samples per leaf, 2 max depth: 0.173 +/- 0.053
Feature 32: 64 min samples per leaf, 6 max depth: 0.174 +/- 0.054
Feature 32: 64 min samples per leaf, 15 max depth: 0.174 +/- 0.054
Feature 32: 64 min samples per leaf, 32 max depth: 0.174 +/- 0.054
Feature 32: 64 min samples per leaf, None max depth: 0.174 +/- 0.054
Feature 33: 1 min samples per leaf, 2 max depth: 0.052 +/- 0.022
Feature 33: 1 min samples per leaf, 6 max depth: 0.047 +/- 0.016
Feature 33: 1 min samples per leaf, 15 max depth: 0.047 +/- 0.017
Feature 33: 1 min samples per leaf, 32 max depth: 0.053 +/- 0.018
Feature 33: 1 min samples per leaf, None max depth: 0.053 +/- 0.018
Feature 33: 4 min samples per leaf, 2 max depth: 0.052 +/- 0.022
Feature 33: 4 min samples per leaf, 6 max depth: 0.045 +/- 0.016
Feature 33: 4 min samples per leaf, 15 max depth: 0.046 +/- 0.018
Feature 33: 4 min samples per leaf, 32 max depth: 0.047 +/- 0.018
Feature 33: 4 min samples per leaf, None max depth: 0.047 +/- 0.018
Feature 33: 16 min samples per leaf, 2 max depth: 0.043 +/- 0.021
Feature 33: 16 min samples per leaf, 6 max depth: 0.042 +/- 0.017
Feature 33: 16 min samples per leaf, 15 max depth: 0.043 +/- 0.017
Feature 33: 16 min samples per leaf, 32 max depth: 0.043 +/- 0.017
Feature 33: 16 min samples per leaf, None max depth: 0.043 +/- 0.017
Feature 33: 64 min samples per leaf, 2 max depth: 0.046 +/- 0.021
Feature 33: 64 min samples per leaf, 6 max depth: 0.046 +/- 0.020
Feature 33: 64 min samples per leaf, 15 max depth: 0.046 +/- 0.020
Feature 33: 64 min samples per leaf, 32 max depth: 0.046 +/- 0.020
Feature 33: 64 min samples per leaf, None max depth: 0.046 +/- 0.020
Feature 34: 1 min samples per leaf, 2 max depth: 0.148 +/- 0.046
Feature 34: 1 min samples per leaf, 6 max depth: 0.183 +/- 0.056
Feature 34: 1 min samples per leaf, 15 max depth: 0.182 +/- 0.055
Feature 34: 1 min samples per leaf, 32 max depth: 0.181 +/- 0.055
Feature 34: 1 min samples per leaf, None max depth: 0.181 +/- 0.055
Feature 34: 4 min samples per leaf, 2 max depth: 0.149 +/- 0.046
Feature 34: 4 min samples per leaf, 6 max depth: 0.182 +/- 0.056
Feature 34: 4 min samples per leaf, 15 max depth: 0.178 +/- 0.054
Feature 34: 4 min samples per leaf, 32 max depth: 0.178 +/- 0.054
Feature 34: 4 min samples per leaf, None max depth: 0.178 +/- 0.054
Feature 34: 16 min samples per leaf, 2 max depth: 0.147 +/- 0.046
Feature 34: 16 min samples per leaf, 6 max depth: 0.169 +/- 0.052
Feature 34: 16 min samples per leaf, 15 max depth: 0.171 +/- 0.053
Feature 34: 16 min samples per leaf, 32 max depth: 0.171 +/- 0.053
Feature 34: 16 min samples per leaf, None max depth: 0.171 +/- 0.053
Feature 34: 64 min samples per leaf, 2 max depth: 0.143 +/- 0.044
Feature 34: 64 min samples per leaf, 6 max depth: 0.148 +/- 0.046
Feature 34: 64 min samples per leaf, 15 max depth: 0.148 +/- 0.046
Feature 34: 64 min samples per leaf, 32 max depth: 0.148 +/- 0.046
Feature 34: 64 min samples per leaf, None max depth: 0.148 +/- 0.046
Feature 35: 1 min samples per leaf, 2 max depth: 0.162 +/- 0.050
Feature 35: 1 min samples per leaf, 6 max depth: 0.196 +/- 0.060
Feature 35: 1 min samples per leaf, 15 max depth: 0.195 +/- 0.060
Feature 35: 1 min samples per leaf, 32 max depth: 0.197 +/- 0.061
Feature 35: 1 min samples per leaf, None max depth: 0.197 +/- 0.061
Feature 35: 4 min samples per leaf, 2 max depth: 0.161 +/- 0.050
Feature 35: 4 min samples per leaf, 6 max depth: 0.196 +/- 0.060
Feature 35: 4 min samples per leaf, 15 max depth: 0.193 +/- 0.059
Feature 35: 4 min samples per leaf, 32 max depth: 0.192 +/- 0.059
Feature 35: 4 min samples per leaf, None max depth: 0.192 +/- 0.059
Feature 35: 16 min samples per leaf, 2 max depth: 0.163 +/- 0.050
Feature 35: 16 min samples per leaf, 6 max depth: 0.187 +/- 0.057
Feature 35: 16 min samples per leaf, 15 max depth: 0.187 +/- 0.057
Feature 35: 16 min samples per leaf, 32 max depth: 0.187 +/- 0.057
Feature 35: 16 min samples per leaf, None max depth: 0.187 +/- 0.057
Feature 35: 64 min samples per leaf, 2 max depth: 0.157 +/- 0.048
Feature 35: 64 min samples per leaf, 6 max depth: 0.160 +/- 0.049
Feature 35: 64 min samples per leaf, 15 max depth: 0.160 +/- 0.049
Feature 35: 64 min samples per leaf, 32 max depth: 0.160 +/- 0.049
Feature 35: 64 min samples per leaf, None max depth: 0.160 +/- 0.049
Feature 36: 1 min samples per leaf, 2 max depth: 0.135 +/- 0.044
Feature 36: 1 min samples per leaf, 6 max depth: 0.119 +/- 0.040
Feature 36: 1 min samples per leaf, 15 max depth: 0.115 +/- 0.039
Feature 36: 1 min samples per leaf, 32 max depth: 0.115 +/- 0.038
Feature 36: 1 min samples per leaf, None max depth: 0.115 +/- 0.038
Feature 36: 4 min samples per leaf, 2 max depth: 0.134 +/- 0.044
Feature 36: 4 min samples per leaf, 6 max depth: 0.115 +/- 0.039
Feature 36: 4 min samples per leaf, 15 max depth: 0.107 +/- 0.037
Feature 36: 4 min samples per leaf, 32 max depth: 0.107 +/- 0.037
Feature 36: 4 min samples per leaf, None max depth: 0.107 +/- 0.037
Feature 36: 16 min samples per leaf, 2 max depth: 0.134 +/- 0.043
Feature 36: 16 min samples per leaf, 6 max depth: 0.131 +/- 0.043
Feature 36: 16 min samples per leaf, 15 max depth: 0.129 +/- 0.042
Feature 36: 16 min samples per leaf, 32 max depth: 0.129 +/- 0.042
Feature 36: 16 min samples per leaf, None max depth: 0.129 +/- 0.042
Feature 36: 64 min samples per leaf, 2 max depth: 0.133 +/- 0.042
Feature 36: 64 min samples per leaf, 6 max depth: 0.132 +/- 0.042
Feature 36: 64 min samples per leaf, 15 max depth: 0.132 +/- 0.042
Feature 36: 64 min samples per leaf, 32 max depth: 0.132 +/- 0.042
Feature 36: 64 min samples per leaf, None max depth: 0.132 +/- 0.042
Feature 37: 1 min samples per leaf, 2 max depth: 0.162 +/- 0.053
Feature 37: 1 min samples per leaf, 6 max depth: 0.180 +/- 0.057
Feature 37: 1 min samples per leaf, 15 max depth: 0.169 +/- 0.053
Feature 37: 1 min samples per leaf, 32 max depth: 0.171 +/- 0.054
Feature 37: 1 min samples per leaf, None max depth: 0.171 +/- 0.054
Feature 37: 4 min samples per leaf, 2 max depth: 0.162 +/- 0.053
Feature 37: 4 min samples per leaf, 6 max depth: 0.179 +/- 0.057
Feature 37: 4 min samples per leaf, 15 max depth: 0.169 +/- 0.053
Feature 37: 4 min samples per leaf, 32 max depth: 0.168 +/- 0.053
Feature 37: 4 min samples per leaf, None max depth: 0.168 +/- 0.053
Feature 37: 16 min samples per leaf, 2 max depth: 0.161 +/- 0.053
Feature 37: 16 min samples per leaf, 6 max depth: 0.176 +/- 0.056
Feature 37: 16 min samples per leaf, 15 max depth: 0.172 +/- 0.055
Feature 37: 16 min samples per leaf, 32 max depth: 0.172 +/- 0.055
Feature 37: 16 min samples per leaf, None max depth: 0.172 +/- 0.055
Feature 37: 64 min samples per leaf, 2 max depth: 0.134 +/- 0.043
Feature 37: 64 min samples per leaf, 6 max depth: 0.136 +/- 0.044
Feature 37: 64 min samples per leaf, 15 max depth: 0.136 +/- 0.044
Feature 37: 64 min samples per leaf, 32 max depth: 0.136 +/- 0.044
Feature 37: 64 min samples per leaf, None max depth: 0.136 +/- 0.044
Feature 38: 1 min samples per leaf, 2 max depth: 0.129 +/- 0.042
Feature 38: 1 min samples per leaf, 6 max depth: 0.126 +/- 0.042
Feature 38: 1 min samples per leaf, 15 max depth: 0.115 +/- 0.038
Feature 38: 1 min samples per leaf, 32 max depth: 0.115 +/- 0.038
Feature 38: 1 min samples per leaf, None max depth: 0.115 +/- 0.038
Feature 38: 4 min samples per leaf, 2 max depth: 0.130 +/- 0.042
Feature 38: 4 min samples per leaf, 6 max depth: 0.130 +/- 0.042
Feature 38: 4 min samples per leaf, 15 max depth: 0.122 +/- 0.040
Feature 38: 4 min samples per leaf, 32 max depth: 0.122 +/- 0.040
Feature 38: 4 min samples per leaf, None max depth: 0.122 +/- 0.040
Feature 38: 16 min samples per leaf, 2 max depth: 0.125 +/- 0.040
Feature 38: 16 min samples per leaf, 6 max depth: 0.132 +/- 0.043
Feature 38: 16 min samples per leaf, 15 max depth: 0.128 +/- 0.041
Feature 38: 16 min samples per leaf, 32 max depth: 0.128 +/- 0.041
Feature 38: 16 min samples per leaf, None max depth: 0.128 +/- 0.041
Feature 38: 64 min samples per leaf, 2 max depth: 0.121 +/- 0.039
Feature 38: 64 min samples per leaf, 6 max depth: 0.124 +/- 0.041
Feature 38: 64 min samples per leaf, 15 max depth: 0.124 +/- 0.041
Feature 38: 64 min samples per leaf, 32 max depth: 0.124 +/- 0.041
Feature 38: 64 min samples per leaf, None max depth: 0.124 +/- 0.041
Feature 39: 1 min samples per leaf, 2 max depth: 0.092 +/- 0.032
Feature 39: 1 min samples per leaf, 6 max depth: 0.088 +/- 0.031
Feature 39: 1 min samples per leaf, 15 max depth: 0.072 +/- 0.025
Feature 39: 1 min samples per leaf, 32 max depth: 0.069 +/- 0.024
Feature 39: 1 min samples per leaf, None max depth: 0.069 +/- 0.024
Feature 39: 4 min samples per leaf, 2 max depth: 0.090 +/- 0.031
Feature 39: 4 min samples per leaf, 6 max depth: 0.082 +/- 0.029
Feature 39: 4 min samples per leaf, 15 max depth: 0.063 +/- 0.023
Feature 39: 4 min samples per leaf, 32 max depth: 0.063 +/- 0.023
Feature 39: 4 min samples per leaf, None max depth: 0.063 +/- 0.023
Feature 39: 16 min samples per leaf, 2 max depth: 0.091 +/- 0.031
Feature 39: 16 min samples per leaf, 6 max depth: 0.079 +/- 0.028
Feature 39: 16 min samples per leaf, 15 max depth: 0.072 +/- 0.026
Feature 39: 16 min samples per leaf, 32 max depth: 0.072 +/- 0.026
Feature 39: 16 min samples per leaf, None max depth: 0.072 +/- 0.026
Feature 39: 64 min samples per leaf, 2 max depth: 0.074 +/- 0.025
Feature 39: 64 min samples per leaf, 6 max depth: 0.074 +/- 0.025
Feature 39: 64 min samples per leaf, 15 max depth: 0.074 +/- 0.025
Feature 39: 64 min samples per leaf, 32 max depth: 0.074 +/- 0.025
Feature 39: 64 min samples per leaf, None max depth: 0.074 +/- 0.025
Feature 40: 1 min samples per leaf, 2 max depth: 0.197 +/- 0.061
Feature 40: 1 min samples per leaf, 6 max depth: 0.188 +/- 0.058
Feature 40: 1 min samples per leaf, 15 max depth: 0.176 +/- 0.055
Feature 40: 1 min samples per leaf, 32 max depth: 0.174 +/- 0.054
Feature 40: 1 min samples per leaf, None max depth: 0.174 +/- 0.054
Feature 40: 4 min samples per leaf, 2 max depth: 0.198 +/- 0.061
Feature 40: 4 min samples per leaf, 6 max depth: 0.194 +/- 0.060
Feature 40: 4 min samples per leaf, 15 max depth: 0.186 +/- 0.057
Feature 40: 4 min samples per leaf, 32 max depth: 0.186 +/- 0.057
Feature 40: 4 min samples per leaf, None max depth: 0.186 +/- 0.057
Feature 40: 16 min samples per leaf, 2 max depth: 0.197 +/- 0.061
Feature 40: 16 min samples per leaf, 6 max depth: 0.202 +/- 0.062
Feature 40: 16 min samples per leaf, 15 max depth: 0.202 +/- 0.062
Feature 40: 16 min samples per leaf, 32 max depth: 0.202 +/- 0.062
Feature 40: 16 min samples per leaf, None max depth: 0.202 +/- 0.062
Feature 40: 64 min samples per leaf, 2 max depth: 0.189 +/- 0.059
Feature 40: 64 min samples per leaf, 6 max depth: 0.193 +/- 0.060
Feature 40: 64 min samples per leaf, 15 max depth: 0.193 +/- 0.060
Feature 40: 64 min samples per leaf, 32 max depth: 0.193 +/- 0.060
Feature 40: 64 min samples per leaf, None max depth: 0.193 +/- 0.060
Feature 41: 1 min samples per leaf, 2 max depth: 0.214 +/- 0.065
Feature 41: 1 min samples per leaf, 6 max depth: 0.209 +/- 0.064
Feature 41: 1 min samples per leaf, 15 max depth: 0.211 +/- 0.065
Feature 41: 1 min samples per leaf, 32 max depth: 0.211 +/- 0.065
Feature 41: 1 min samples per leaf, None max depth: 0.211 +/- 0.065
Feature 41: 4 min samples per leaf, 2 max depth: 0.214 +/- 0.065
Feature 41: 4 min samples per leaf, 6 max depth: 0.212 +/- 0.065
Feature 41: 4 min samples per leaf, 15 max depth: 0.206 +/- 0.063
Feature 41: 4 min samples per leaf, 32 max depth: 0.206 +/- 0.063
Feature 41: 4 min samples per leaf, None max depth: 0.206 +/- 0.063
Feature 41: 16 min samples per leaf, 2 max depth: 0.213 +/- 0.065
Feature 41: 16 min samples per leaf, 6 max depth: 0.215 +/- 0.065
Feature 41: 16 min samples per leaf, 15 max depth: 0.215 +/- 0.065
Feature 41: 16 min samples per leaf, 32 max depth: 0.215 +/- 0.065
Feature 41: 16 min samples per leaf, None max depth: 0.215 +/- 0.065
Feature 41: 64 min samples per leaf, 2 max depth: 0.205 +/- 0.062
Feature 41: 64 min samples per leaf, 6 max depth: 0.207 +/- 0.063
Feature 41: 64 min samples per leaf, 15 max depth: 0.207 +/- 0.063
Feature 41: 64 min samples per leaf, 32 max depth: 0.207 +/- 0.063
Feature 41: 64 min samples per leaf, None max depth: 0.207 +/- 0.063



In [45]:

    
import pickle
with open('data.pickle','wb') as f:
    pickle.dump(results,f)



In [59]:

    
results2[:,0,3]









    Out[59]:





array([  0.00000000e+00,   0.00000000e+00,  -1.69723722e-04,
         0.00000000e+00,   1.69704999e-03,  -1.61904458e-03,
         0.00000000e+00,   1.37203856e-04,   1.99234317e-04,
         0.00000000e+00,   3.85266740e-05,  -1.39141356e-04,
        -3.18332595e-04,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,  -2.32184524e-03,  -2.05099956e-05,
         2.70573941e-04,   0.00000000e+00,   0.00000000e+00,
        -1.02199441e-04,  -3.67148617e-04,  -2.51297232e-05,
         0.00000000e+00,   0.00000000e+00,  -2.10769807e-03,
         0.00000000e+00,  -1.48029305e-04,   2.95277257e-04,
         3.69270851e-05,   0.00000000e+00,   0.00000000e+00,
        -9.05272440e-05,   0.00000000e+00,   0.00000000e+00,
         0.00000000e+00,   9.19217603e-05,  -1.43068347e-04,
        -2.40110541e-04,   0.00000000e+00,   0.00000000e+00])



In [78]:

    
results2 = results - np.tile(results[:,0:1,4:5,:],(1,4,5,1)) # Subtract the no max depth, no min sample condition.  
results2 = results2[:,:,:,:n_splits] # Only 12 splits used.  
results2 = results2.mean(axis=3) / (results2.std(axis=3)/np.sqrt(n_splits)) 
results2[np.where(np.isnan(results2))] = 0

fig,axes = plt.subplots(14,3,sharex=True,sharey=True,figsize=(10,20))
for col,ax in enumerate(axes.flat):
    im = ax.pcolormesh(results2[col,:,:],vmin=-5,vmax=5,cmap='RdBu')
    ax.set_xticks(np.arange(5)+0.5)
    ax.set_xticklabels(n_depth)
    ax.set_yticks(np.arange(4)+0.5)
    ax.set_yticklabels(n_samples_leaf)
    ax.set_title('Feature %d' % col)
cbar_ax = fig.add_axes([1.05, 0.15, 0.05, 0.7])
cbar_ax.set_xlabel('Z-Score for correlation vs default condition')
plt.colorbar(im, cax=cbar_ax)
fig.text(0.5, -0.02, 'max_depth', ha='center')
fig.text(-0.02, 0.5, 'min_samples_leaf', va='center', rotation='vertical')
plt.tight_layout()



In [76]:

    
np.where(np.isnan(results2))









    Out[76]:





(array([ 0,  0,  1,  1,  2,  3,  3,  4,  5,  6,  6,  7,  8,  9,  9, 10, 11,
        12, 13, 13, 14, 14, 15, 15, 16, 17, 18, 19, 19, 20, 20, 21, 22, 23,
        24, 24, 25, 25, 26, 27, 27, 28, 29, 30, 31, 31, 32, 32, 33, 34, 34,
        35, 35, 36, 36, 37, 38, 39, 40, 40, 41, 41]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([3, 4, 3, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 3,
        4, 4, 4, 4, 3, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4, 4, 3, 4, 4, 4, 4, 3, 4,
        3, 4, 4, 3, 4, 3, 4, 3, 4, 4, 4, 4, 3, 4, 3, 4]))



In [70]:









    Out[70]:





array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.])



In [46]:

    
import matplotlib.pyplot as plt
fig,axes = plt.subplots(14,3,sharex=True,sharey=True,figsize=(10,20))
for col,ax in enumerate(axes.flat):
    im = ax.pcolormesh(results[col,:,:,:].mean(axis=2)-results[col,:,:,:].mean(axis=2).max(),vmin=-0.3,vmax=0.0,cmap='gray')
    ax.set_xticks(np.arange(5)+0.5)
    ax.set_xticklabels(n_depth)
    ax.set_yticks(np.arange(4)+0.5)
    ax.set_yticklabels(n_samples_leaf)
    ax.set_title('Feature %d' % col)
cbar_ax = fig.add_axes([1.05, 0.15, 0.05, 0.7])
cbar_ax.set_xlabel('Correlation - \nBest Correlation')
plt.colorbar(im, cax=cbar_ax)
fig.text(0.5, -0.02, 'max_depth', ha='center')
fig.text(-0.02, 0.5, 'min_samples_leaf', va='center', rotation='vertical')
plt.tight_layout()









    



/anaconda/lib/python3.4/site-packages/matplotlib/figure.py:1653: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "



In [44]:

    
# Now we focus just on max_depth?  
n_splits = 10
n_depth = [2,5,8,12,16,25,None]
results = np.zeros((42,len(n_depth),n_splits))
shuffle_split = ShuffleSplit(n_obs,n_splits,test_size=0.2,random_state=0)
for col in range(42):
    for j,max_depth in enumerate(n_depth):
        rfc = RandomForestRegressor(n_estimators=10,max_features=None,
                                        min_samples_leaf=1,
                                        max_depth=max_depth,oob_score=False,
                                        n_jobs=-1,random_state=0)
        X = X_all['dragon+all']
        Y = Y_all_mask['mean_std'][:,col]
        for k,(train,test) in enumerate(shuffle_split):
            observed = Y[test]
            rfc.fit(X[train,:],Y[train])
            predicted = rfc.predict(X[test,:])
            results[col,j,k] = np.corrcoef(predicted,observed)[1,0]
    means = results[col,:,:].mean(axis=1)
    sems = results[col,:,:].std(axis=1)/np.sqrt(n_splits)
    print(('Desc. %d:'+len(n_depth)*' [%.3f],') % \
          tuple([col]+[means[i] for i in range(len(n_depth))]))









    



Desc. 0: [0.509], [0.566], [0.577], [0.581], [0.579], [0.576], [0.576],
Desc. 1: [0.502], [0.600], [0.619], [0.623], [0.622], [0.618], [0.617],
Desc. 2: [0.547], [0.588], [0.593], [0.591], [0.584], [0.576], [0.573],
Desc. 3: [0.524], [0.624], [0.642], [0.640], [0.649], [0.641], [0.641],
Desc. 4: [0.548], [0.651], [0.664], [0.675], [0.663], [0.667], [0.672],
Desc. 5: [0.393], [0.415], [0.399], [0.407], [0.409], [0.399], [0.402],
Desc. 6: [0.589], [0.630], [0.606], [0.604], [0.604], [0.605], [0.604],
Desc. 7: [0.312], [0.330], [0.304], [0.283], [0.280], [0.290], [0.285],
Desc. 8: [0.216], [0.267], [0.282], [0.291], [0.287], [0.281], [0.286],
Desc. 9: [0.452], [0.541], [0.542], [0.537], [0.535], [0.534], [0.531],
Desc. 10: [0.335], [0.437], [0.449], [0.444], [0.451], [0.451], [0.447],
Desc. 11: [0.380], [0.394], [0.380], [0.389], [0.384], [0.383], [0.382],
Desc. 12: [0.257], [0.232], [0.209], [0.207], [0.204], [0.207], [0.208],
Desc. 13: [0.347], [0.414], [0.416], [0.402], [0.400], [0.404], [0.402],
Desc. 14: [0.379], [0.462], [0.472], [0.478], [0.476], [0.478], [0.478],
Desc. 15: [0.261], [0.295], [0.298], [0.292], [0.291], [0.292], [0.292],
Desc. 16: [0.399], [0.479], [0.493], [0.482], [0.490], [0.483], [0.489],
Desc. 17: [0.299], [0.274], [0.276], [0.271], [0.275], [0.273], [0.274],
Desc. 18: [0.251], [0.279], [0.267], [0.264], [0.252], [0.245], [0.243],
Desc. 19: [0.441], [0.500], [0.517], [0.509], [0.507], [0.504], [0.504],
Desc. 20: [0.514], [0.571], [0.573], [0.572], [0.564], [0.569], [0.570],
Desc. 21: [0.019], [-0.041], [-0.054], [-0.066], [-0.065], [-0.067], [-0.066],
Desc. 22: [0.109], [0.192], [0.198], [0.218], [0.219], [0.221], [0.222],
Desc. 23: [0.308], [0.349], [0.347], [0.316], [0.296], [0.305], [0.305],
Desc. 24: [0.398], [0.459], [0.464], [0.457], [0.453], [0.451], [0.451],
Desc. 25: [0.465], [0.514], [0.501], [0.491], [0.491], [0.486], [0.486],
Desc. 26: [0.386], [0.406], [0.381], [0.370], [0.366], [0.364], [0.366],
Desc. 27: [0.462], [0.462], [0.441], [0.434], [0.440], [0.431], [0.431],
Desc. 28: [0.239], [0.203], [0.201], [0.190], [0.189], [0.190], [0.190],
Desc. 29: [0.184], [0.214], [0.242], [0.239], [0.240], [0.241], [0.241],
Desc. 30: [0.357], [0.426], [0.434], [0.434], [0.435], [0.434], [0.433],
Desc. 31: [0.290], [0.342], [0.348], [0.329], [0.329], [0.332], [0.331],
Desc. 32: [0.365], [0.334], [0.343], [0.341], [0.336], [0.337], [0.337],
Desc. 33: [0.120], [0.098], [0.087], [0.100], [0.103], [0.103], [0.103],
Desc. 34: [0.294], [0.369], [0.373], [0.368], [0.375], [0.369], [0.369],
Desc. 35: [0.324], [0.404], [0.412], [0.402], [0.409], [0.414], [0.414],
Desc. 36: [0.253], [0.247], [0.220], [0.219], [0.224], [0.223], [0.223],
Desc. 37: [0.327], [0.368], [0.366], [0.343], [0.340], [0.334], [0.334],
Desc. 38: [0.229], [0.243], [0.236], [0.223], [0.221], [0.211], [0.213],
Desc. 39: [0.166], [0.171], [0.158], [0.133], [0.129], [0.126], [0.124],
Desc. 40: [0.397], [0.379], [0.345], [0.333], [0.337], [0.332], [0.333],
Desc. 41: [0.453], [0.426], [0.422], [0.419], [0.423], [0.423], [0.423],



In [45]:

    
# Visualizing the dependence on n_depth
plt.figure(figsize=(9,7))
plt.pcolormesh(results.mean(axis=2) - np.tile(results.mean(axis=2).max(axis=1),(7,1)).T, vmin=-0.2, vmax=0.0)
plt.xticks(np.arange(len(n_depth))+0.5,n_depth)
plt.yticks(np.arange(42)+0.5,np.arange(42))
plt.ylim(0,42)
plt.ylabel('Feature #')
plt.set_cmap('gray')
ax = plt.colorbar()
ax.set_label('Correlation - \nBest Correlation')
plt.figure()
n_depth_ = [_ if _ is not None else 100 for _ in n_depth]
plt.plot(n_depth_,results[:,:,:].mean(axis=2).T)
plt.xscale('log')
plt.xlabel('Max Features')
plt.ylabel('Correlation')
_ = plt.xlim(2,100)



In [30]:

    
def f_transformation(x, k0=1.0, k1=1.0):
    return 100*(k0*(x/100)**(k1*0.5) - k0*(x/100)**(k1*2))
    
def sse(x, mean, stdev):
    predicted_stdev = f_transformation(mean, k0=x[0], k1=x[1])
    #predicted_mean = f_transformation2(predicted[i], k0=x[0], k1=x[1], k2=x[2])
    sse = np.sum((predicted_stdev - stdev)**2)
    return sse
    
fig,axes = plt.subplots(7,3,figsize=(7,12))
ax = axes.flat
f_coefs = {col:None for col in range(21)}
from scipy.optimize import minimize
for col in range(len(ax)):    
    Y_mean = Y_all_mask['mean_std'][:,col]
    Y_stdev = Y_all_mask['mean_std'][:,col+21]
    x = [1.0,1.0]
    res = minimize(sse, x, args=(Y_mean,Y_stdev), method='L-BFGS-B')
    print(col,res.x)
    f_coefs[col] = res.x
    ax[col].scatter(Y_mean,Y_stdev,s=0.1)
    x_ = np.linspace(0,100,100)
    ax[col].plot(x_,f_transformation(x_, k0=res.x[0], k1=res.x[1]))#, k2=res.x[2]))









    



0 [ 0.71787708  1.0783493 ]
1 [ 0.53926198  1.11949144]
2 [ 0.92942102  1.16972938]
3 [ 0.77463092  1.13112086]
4 [ 0.86310257  1.14042383]
5 [ 1.14091214  1.30928986]
6 [ 0.90388954  1.14596541]
7 [ 0.92814885  1.21977815]
8 [ 1.01172533  1.22356058]
9 [ 0.89110208  1.19840522]
10 [ 0.94056219  1.21105179]
11 [ 0.89759906  1.23709485]
12 [ 1.00520858  1.27475276]
13 [ 0.91949363  1.23973119]
14 [ 1.02708055  1.27648987]
15 [ 1.05155092  1.26929085]
16 [ 0.95116679  1.19434156]
17 [ 1.13320597  1.3030216 ]
18 [ 1.07812377  1.25676318]
19 [ 0.90582241  1.13672918]
20 [ 0.80440721  1.12741897]



In [31]:

    
# Balance between directly fitting stdev and applying a function to the fit of the mean.  

n_splits = 10
shuffle_split = ShuffleSplit(n_obs,n_splits,test_size=0.2,random_state=0)
#predictions_mean = {i:[None]*n_splits for i in range(21)}
#predictions_stdev = {i:[None]*n_splits for i in range(21)}
for col in range(21):
    X = X_all['dragon+all']
    Y_mean = Y_all_mask['mean_std'][:,col]
    Y_stdev = Y_all_mask['mean_std'][:,col+21]
              
    for k,(train,test) in enumerate(shuffle_split):
        print(col,k)
        rfc_mean = RandomForestRegressor(n_estimators=30,max_features=None,
                                        min_samples_leaf=1,
                                        max_depth=None,
                                        n_jobs=-1,random_state=0)
        rfc_stdev = RandomForestRegressor(n_estimators=30,max_features=None,
                                        min_samples_leaf=1,
                                        max_depth=None,
                                        n_jobs=-1,random_state=0)
    
        rfc_mean.fit(X[train,:],Y_mean[train])
        rfc_stdev.fit(X[train,:],Y_stdev[train])
        predictions_mean[col][k] = rfc_mean.predict(X[test,:])
        predictions_stdev[col][k] = rfc_stdev.predict(X[test,:])



In [40]:

    
# Balance between directly fitting stdev and applying a function to the fit of the mean.  

results = np.zeros((21,n_splits,35))
#predictions_mean = {i:[None]*n_splits for i in range(21)}
#predictions_stdev = {i:[None]*n_splits for i in range(21)}
for col in range(21):
    X = X_all['dragon+all']
    Y_mean = Y_all_mask['mean_std'][:,col]
    Y_stdev = Y_all_mask['mean_std'][:,col+21]
              
    for k,(train,test) in enumerate(shuffle_split):
        observed = Y_stdev[test]
        p_m = predictions_mean[col][k]
        p_s = predictions_stdev[col][k]
        for i,a in enumerate(np.linspace(0,1,35)):
            p_s_transformed = f_transformation(p_m, k0=f_coefs[col][0], k1=f_coefs[col][0])
            predicted = a*p_s_transformed + (1-a)*p_s
            results[col,k,i] = np.corrcoef(predicted,observed)[1,0]
plt.pcolormesh(np.linspace(0,1,35),np.arange(21),results.mean(axis=1))# - np.tile(results.mean(axis=1).max(axis=1),(35,1)).T)
plt.xlabel('Weight given to transformation of the mean')
plt.ylabel('Feature #')
plt.colorbar()
plt.figure()
plt.plot(np.linspace(0,1,35),results.mean(axis=1).T-results.mean(axis=1)[:,0])
plt.xlabel('Weight given to transformation of the mean')
for col in range(21):
    max_loc = np.argmax(results.mean(axis=1)[col,:])
    print(col,np.linspace(0,1,35)[max_loc])









    



0 1.0
1 0.0294117647059
2 1.0
3 0.617647058824
4 0.794117647059
5 0.970588235294
6 0.676470588235
7 0.882352941176
8 1.0
9 0.764705882353
10 0.911764705882
11 0.5
12 1.0
13 0.352941176471
14 0.676470588235
15 1.0
16 1.0
17 1.0
18 1.0
19 1.0
20 0.382352941176



In [26]:

    
write = True # Set to True to actually generate the prediction files.  
n_estimators = 1000 # Set this to a high number (e.g. 1000) to get a good fit.  

# Best parameters, determined independently.  
max_features = {'int':{'mean':None,'sigma':None},
                'ple':{'mean':None,'sigma':None},
                'dec':{'mean':None,'sigma':None}}
min_samples_leaf = {'int':{'mean':1,'sigma':4},
                'ple':{'mean':1,'sigma':1},
                'dec':{'mean':1,'sigma':1}}
max_depth = {'int':{'mean':None,'sigma':2},
                'ple':{'mean':10,'sigma':10},
                'dec':{'mean':10,'sigma':10}}
et = {'int':{'mean':True,'sigma':True},
                'ple':{'mean':False,'sigma':False},
                'dec':{'mean':False,'sigma':False}}
#et['int'] = {'mean':False,'sigma':False} # Uncomment to get a correct score estimate, or leave commented to get best fit.  
use_mask = {'int':{'mean':False,'sigma':True},
                'ple':{'mean':False,'sigma':True},
                'dec':{'mean':False,'sigma':True}}



In [27]:

    
for mdt in molecular_data_types:
    print(mdt)
    loading.make_prediction_files(rfcs_leaderboard[mdt],X_leaderboard_int[mdt],X_leaderboard_other[mdt],
                                  'leaderboard_%s' % mdt,2,Y_test=Y_leaderboard_noimpute,write=False)









    



dragon
Score: 8.614029; rs = 0.658,0.517,0.522,0.385,0.244,0.479
dragon+episuite
Score: 8.716942; rs = 0.665,0.509,0.535,0.372,0.238,0.487
dragon+verbal
Score: 8.299890; rs = 0.662,0.498,0.506,0.260,0.274,0.468
dragon+morgan
Score: 8.746169; rs = 0.650,0.532,0.535,0.330,0.272,0.492
dragon+nspdk
Score: 9.138397; rs = 0.684,0.577,0.551,0.447,0.256,0.500
dragon+nspdk_gramian
Score: 8.953301; rs = 0.633,0.571,0.543,0.429,0.233,0.499
dragon+all
Score: 9.055417; rs = 0.648,0.573,0.555,0.371,0.273,0.504



In [28]:

    
rs_ = [[0.658,0.517,0.522,0.385,0.244,0.479],[0.665,0.509,0.535,0.372,0.238,0.487],[0.662,0.498,0.506,0.260,0.274,0.468],
[0.650,0.532,0.535,0.330,0.272,0.492],[0.684,0.577,0.551,0.447,0.256,0.500],[0.655,0.551,0.535,0.269,0.311,0.486]]
for i,challenge in enumerate(rs_):
    print(molecular_data_types[i]+'\t'.join(str(x) for x in challenge))









    



dragon0.658	0.517	0.522	0.385	0.244	0.479
dragon+episuite0.665	0.509	0.535	0.372	0.238	0.487
dragon+verbal0.662	0.498	0.506	0.26	0.274	0.468
dragon+morgan0.65	0.532	0.535	0.33	0.272	0.492
dragon+nspdk0.684	0.577	0.551	0.447	0.256	0.5
dragon+all0.655	0.551	0.535	0.269	0.311	0.486



In [29]:

    
rfcs,score,rs = fit2.rfc_final(X_all,Y_all_imp['mean_std'],Y_all_mask['mean_std'],
                            max_features,min_samples_leaf,max_depth,et,use_mask,
                            n_estimators=n_estimators)









    



For subchallenge 2:
	Score = 2.83
	int_mean = 1.000
	int_sigma = 0.126
	ple_mean = 0.009
	ple_sigma = -0.010
	dec_mean = 0.133
	dec_sigma = 0.072



In [30]:

    
loading.make_prediction_files(rfcs,X_testset_int,X_testset_other,'testset',2,write=False)









    Out[30]:





{'mean_std': array([[ 33.26530612,  44.62244898,   2.12244898, ...,   9.93389033,
          10.11045229,  21.72142009],
        [ 12.95918367,  48.15306122,   0.91836735, ...,   7.59583474,
          18.41829388,  22.26213039],
        [ 21.16326531,  44.62244898,   8.12244898, ...,   4.7887756 ,
           8.37787436,  24.78012562],
        ..., 
        [ 39.53061224,  59.50204082,   3.82780612, ...,  12.27196668,
          22.87507041,  19.62084018],
        [ 18.3877551 ,  57.82653061,   1.42212675, ...,   8.39165909,
          14.20939341,  23.09079336],
        [ 29.08163265,  49.21836735,   3.95918367, ...,  11.30602357,
          31.18954828,  22.55003619]]), 'subject': {}}