Facies classification using Machine Learning

Bird Team: PG+AC


In [42]:
%matplotlib inline
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.multiclass import OneVsOneClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, cross_val_predict, LeaveOneGroupOut, LeavePGroupsOut
from sklearn.metrics import confusion_matrix, make_scorer, f1_score, accuracy_score, recall_score, precision_score
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel, RFECV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.base import clone
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from stacking_classifiers import *
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
pd.options.mode.chained_assignment = None

In [2]:
filename = '../facies_vectors.csv'
training_data = pd.read_csv(filename)
print(set(training_data["Well Name"]))
training_data.head()


set(['SHRIMPLIN', 'Recruit F9', 'ALEXANDER D', 'SHANKLE', 'CHURCHMAN BIBLE', 'NOLAN', 'KIMZEY A', 'NEWBY', 'LUKE G U', 'CROSS H CATTLE'])
Out[2]:
Facies Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
0 3 A1 SH SHRIMPLIN 2793.0 77.45 0.664 9.9 11.915 4.6 1 1.000
1 3 A1 SH SHRIMPLIN 2793.5 78.26 0.661 14.2 12.565 4.1 1 0.979
2 3 A1 SH SHRIMPLIN 2794.0 79.05 0.658 14.8 13.050 3.6 1 0.957
3 3 A1 SH SHRIMPLIN 2794.5 86.10 0.655 13.9 13.115 3.5 1 0.936
4 3 A1 SH SHRIMPLIN 2795.0 74.58 0.647 13.5 13.300 3.4 1 0.915

In [3]:
well_data = pd.read_csv('./../validation_data_nofacies.csv')
print(set(well_data["Well Name"]))
print(well_data.shape)
well_data.head()


set(['CRAWFORD', 'STUART'])
(830, 10)
Out[3]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000
1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978
2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956
3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933
4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911

In [4]:
# concat train and test for processing 
well_data["origin"] = 'test'
training_data["origin"] = 'train'
df = pd.concat([well_data,training_data],axis=0,ignore_index=True)[list(training_data.columns)]
df['Well Name'] = df['Well Name'].astype('category')
df.head(10)


Out[4]:
Facies Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS origin
0 NaN A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000 test
1 NaN A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978 test
2 NaN A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956 test
3 NaN A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933 test
4 NaN A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911 test
5 NaN A1 SH STUART 2810.5 73.955 0.667 6.9 12.25 3.086 1 0.889 test
6 NaN A1 SH STUART 2811.0 77.962 0.674 6.5 12.45 3.092 1 0.867 test
7 NaN A1 SH STUART 2811.5 83.894 0.667 6.3 12.65 3.123 1 0.844 test
8 NaN A1 SH STUART 2812.0 84.424 0.653 6.7 13.05 3.121 1 0.822 test
9 NaN A1 SH STUART 2812.5 83.160 0.642 7.3 12.95 3.127 1 0.800 test

In [5]:
# add some features based on the well data. 

# nb points : can be correlated with how soft soil is ? 
print("session")
sessionsize = df.groupby(["Well Name",'Formation']).size().reset_index()
sessionsize.columns =  ["Well Name",'Formation','formation_size']
df = pd.merge(df,sessionsize,how='left',on = ["Well Name",'Formation'])

# depth : 
print("depth")
sessionsize = df.groupby(["Well Name",'Formation'])["Depth"].min().reset_index()
sessionsize.columns =  ["Well Name",'Formation','minimum_depth']
df = pd.merge(df,sessionsize,how='left',on = ["Well Name",'Formation'])

sessionsize = df.groupby(["Well Name",'Formation'])["Depth"].max().reset_index()
sessionsize.columns =  ["Well Name",'Formation','maximum_depth']
df = pd.merge(df,sessionsize,how='left',on = ["Well Name",'Formation'])

df['formation_depth'] = df["maximum_depth"] - df["minimum_depth"]

df["soft_indic"] = df['formation_depth'] / df["formation_size"]

# add avgs of feat
print("add avgs of feat")
list_to_avg = ['Depth', 'GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
for val in list_to_avg : 
    df[val + "_min"] = df.groupby(["Well Name",'Formation'])[val].transform(np.min)
    df[val + "_max"] = df.groupby(["Well Name",'Formation'])[val].transform(np.max)
    df[val + "_mean"] = df.groupby(["Well Name",'Formation'])[val].transform(np.mean)
    df[val + "_var"] = df.groupby(["Well Name",'Formation'])[val].transform(np.var) 

# add distances feat. = an attempt at regulariation.
print("add distances feat.")
for val in list_to_avg : 
    df[val + "_min_dist"] = df[val] -df[val + "_min"]
    df[val + "_max_dist"] =  df[val] -df[val + "_max"]
    df[val + "_mean_dist"] =  df[val] -df[val + "_mean"]
    
# add lag and lead !
print("lag lead")
list_to_lag = ['Depth', 'GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
for val in list_to_lag:
    for lag in range(1,11):
        df[val+'_lag_'+str(lag)]=df[val]-df.groupby("Well Name")[val].shift(periods=lag)
        df[val+'_lead_'+str(lag)]=df[val]-df.groupby("Well Name")[val].shift(periods=-lag)

# adding some Formation lag and lead. 
for lag in range(1,3):
    df['Formation'+'_lag_'+str(lag)]=df.groupby("Well Name")['Formation'].shift(periods=lag)
    df['Formation'+'_lead_'+str(lag)]=df.groupby("Well Name")['Formation'].shift(periods=-lag)
    df['Formation'+'_lag_'+str(lag) + 'equal'] = (df['Formation'+'_lag_'+str(lag)] == df["Formation"]).astype(int)
    df['Formation'+'_lead_'+str(lag) + 'equal'] = (df['Formation'+'_lead_'+str(lag)] == df["Formation"]).astype(int) 

print("rolling")
#Add rolling features
list_to_roll = ['Depth', 'GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M','RELPOS']
window_size = [5,10,15,20,50]
for w in window_size:
    for val in list_to_roll:
        df[val+'_rollingmean_'+str(w)]=df.groupby("Well Name")[val].apply(
            lambda x:x.rolling(window=w,center=True).mean())
        df[val+'_rollingmax_'+str(w)]=df.groupby("Well Name")[val].apply(
            lambda x:x.rolling(window=w,center=True).max())
        df[val+'_rollingmin_'+str(w)]=df.groupby("Well Name")[val].apply(
            lambda x:x.rolling(window=w,center=True).min())
        df[val+'_rollingstd_'+str(w)]=df.groupby("Well Name")[val].apply(
            lambda x:x.rolling(window=w,center=True).std())
        
print("special window features for NM_M")
def NM_M_distance(x,how,target):
    length = len(x)
    rank = np.empty(length)
    count = -1
    NM_M = x["NM_M"].values
    if how=="up":
        order = range(length)
    elif how=="down":
        order = range(length-1,-1,-1)
    for i in order:
        if ((NM_M[i] != target) & (count>-1)):
            count+=1
            rank[i] += count
        elif NM_M[i] == target:
            count=0
        else:
            rank[i] = count
    rank = pd.DataFrame(rank.astype(int), columns=["NM_M_Rank_Target_+"+str(target)+"_"+how], index = x.index)
    return(rank)
df["NM_M_Rank_Target_1_up"]=df.groupby(["Well Name"]).apply(NM_M_distance,how="up",target=1)
df["NM_M_Rank_Target_2_up"]=df.groupby(["Well Name"]).apply(NM_M_distance,how="up",target=2)
df["NM_M_Rank_Target_1_down"]=df.groupby(["Well Name"]).apply(NM_M_distance,how="down",target=1)
df["NM_M_Rank_Target_2_down"]=df.groupby(["Well Name"]).apply(NM_M_distance,how="down",target=2)

print("filling na")
df = df.groupby(["Well Name"], as_index=False).apply(lambda group: group.bfill())
df = df.groupby(["Well Name"], as_index=False).apply(lambda group: group.ffill())
df = df.fillna(df.mean())

print("Vectorizing Formation text data")
from sklearn.feature_extraction.text import CountVectorizer
list_formation = ['Formation',
 'Formation_lag_1',
 'Formation_lead_1',
 'Formation_lag_2',
 'Formation_lead_2']
for l in list_formation:
    cv = CountVectorizer()
    counts = cv.fit_transform(df[l].values)
    cols = [c+"_"+l for c in cv.get_feature_names()]
    counts = pd.DataFrame(counts.toarray(),columns = cols)
    df = df.drop(l,axis = 1)
    df = pd.concat([df,counts],axis=1)

print("Finished preparing data. Now ready for ML ignition!")


session
depth
add avgs of feat
add distances feat.
lag lead
rolling
special window features for NM_M
filling na
Vectorizing Formation text data
Finished preparing data. Now ready for ML ignition!

Fitting


In [39]:
# this time let's use all the training set 
groups = df[(df['origin']=='train')]["Well Name"]
ytrain = df[(df['origin']=='train')]['Facies']
yvalid = df[(df['origin']=='test')]['Facies']
xtrain = df[(df['origin']=='train')].drop(['Well Name','origin','Facies'],axis=1)
xvalid = df[(df['origin']=='test')].drop(['Well Name','origin','Facies'],axis=1)
custom_cv = LeavePGroupsOut(n_groups=2)

In [80]:
set(yvalid.values)


Out[80]:
{4.5032537960954446}

In [7]:
clf_rfe = RandomForestClassifier(
    n_estimators=100,
    criterion="entropy",
    class_weight='balanced',
    min_samples_leaf=5,
    min_samples_split=25,
)

In [8]:
custom_cv_1 = custom_cv.split(xtrain, ytrain, groups)
fs = RFECV(clf_rfe,cv=custom_cv_1,scoring="f1_micro",step=0.1,verbose=2,n_jobs=4)
fs.fit(xtrain, ytrain)


Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 50 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 93 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 222 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 7 features.
Fitting estimator with 394 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 136 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 93 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 437 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 394 features.
Fitting estimator with 437 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 7 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 308 features.
Fitting estimator with 351 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 222 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 179 features.
Fitting estimator with 50 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 50 features.
Fitting estimator with 437 features.
Fitting estimator with 437 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 437 features.
Fitting estimator with 394 features.
Fitting estimator with 394 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 394 features.
Fitting estimator with 351 features.
Fitting estimator with 437 features.
Fitting estimator with 351 features.
Fitting estimator with 351 features.
Fitting estimator with 308 features.
Fitting estimator with 394 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 351 features.
Fitting estimator with 222 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 308 features.
Fitting estimator with 179 features.
Fitting estimator with 179 features.
Fitting estimator with 222 features.
Fitting estimator with 265 features.
Fitting estimator with 136 features.
Fitting estimator with 136 features.
Fitting estimator with 179 features.
Fitting estimator with 93 features.
Fitting estimator with 93 features.
Fitting estimator with 222 features.
Fitting estimator with 50 features.
Fitting estimator with 50 features.
Fitting estimator with 136 features.
Fitting estimator with 7 features.
Fitting estimator with 7 features.
Fitting estimator with 179 features.
Fitting estimator with 437 features.
Fitting estimator with 93 features.
Fitting estimator with 136 features.
Fitting estimator with 50 features.
Fitting estimator with 394 features.
Fitting estimator with 7 features.
Fitting estimator with 93 features.
Fitting estimator with 351 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Fitting estimator with 308 features.
Fitting estimator with 265 features.
Fitting estimator with 222 features.
Fitting estimator with 179 features.
Fitting estimator with 136 features.
Fitting estimator with 93 features.
Fitting estimator with 50 features.
Fitting estimator with 7 features.
Out[8]:
RFECV(cv=<generator object split at 0x10436eaf0>,
   estimator=RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='entropy', max_depth=None, max_features='auto',
            max_leaf_nodes=None, min_impurity_split=1e-07,
            min_samples_leaf=5, min_samples_split=25,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
   n_jobs=4, scoring='f1_micro', step=0.1, verbose=2)

In [34]:
support = fs.support_
feature = pd.Series(xtrain.columns.values)
selected_features = list(feature[support])
print(len(selected_features))
xtrain_fs = xtrain[selected_features].copy()
xvalid_fs = xvalid[selected_features].copy()


351

In [67]:
rf = RandomForestClassifier(
            n_estimators=100,
            criterion="entropy",
            class_weight='balanced',
            min_samples_leaf=5,
            min_samples_split=25,
            max_features=10,
            random_state=42
)

xtc =  ExtraTreesClassifier(
            n_estimators=100,
            criterion="entropy",
            class_weight='balanced',
            min_samples_leaf=5,
            min_samples_split=25,
            max_features=10,
            random_state=42
)

gbt = GradientBoostingClassifier(
            loss='deviance',
            n_estimators = 100, 
            learning_rate = 0.1, 
            max_depth = 3,
            max_features = 10,
            min_samples_leaf = 5,
            min_samples_split = 25,
            random_state = 42, 
            max_leaf_nodes = None
)

xgb = XGBClassifier(
            learning_rate = 0.1, 
            max_depth = 3, 
            min_child_weight = 10, 
            n_estimators = 150, 
            colsample_bytree = 0.9,
            seed = 42
)

custom_cv_2 = list(LeavePGroupsOut(n_groups=2).split(xtrain, ytrain, groups))
stacked = StackedClassifier(clfs = [rf, xtc, gbt, xgb],
                            level2_learner= LogisticRegression(),
                            skf = custom_cv_2
                            )

In [68]:
stacked.fit(xtrain_fs.values, ytrain.values)


Training classifier [0]
Fold [0]
Fitting the model !
accuracy_score on the 0-th fold: 0.549425287356
Fold [1]
Fitting the model !
accuracy_score on the 1-th fold: 0.449844881075
Fold [2]
Fitting the model !
accuracy_score on the 2-th fold: 0.53591160221
Fold [3]
Fitting the model !
accuracy_score on the 3-th fold: 0.556634304207
Fold [4]
Fitting the model !
accuracy_score on the 4-th fold: 0.482238966631
Fold [5]
Fitting the model !
accuracy_score on the 5-th fold: 0.517593643587
Fold [6]
Fitting the model !
accuracy_score on the 6-th fold: 0.567765567766
Fold [7]
Fitting the model !
accuracy_score on the 7-th fold: 0.520218579235
Fold [8]
Fitting the model !
accuracy_score on the 8-th fold: 0.590181430096
Fold [9]
Fitting the model !
accuracy_score on the 9-th fold: 0.432044198895
Fold [10]
Fitting the model !
accuracy_score on the 10-th fold: 0.571767497034
Fold [11]
Fitting the model !
accuracy_score on the 11-th fold: 0.598843930636
Fold [12]
Fitting the model !
accuracy_score on the 12-th fold: 0.491349480969
Fold [13]
Fitting the model !
accuracy_score on the 13-th fold: 0.520146520147
Fold [14]
Fitting the model !
accuracy_score on the 14-th fold: 0.407024793388
Fold [15]
Fitting the model !
accuracy_score on the 15-th fold: 0.505275498242
Fold [16]
Fitting the model !
accuracy_score on the 16-th fold: 0.582857142857
Fold [17]
Fitting the model !
accuracy_score on the 17-th fold: 0.491489361702
Fold [18]
Fitting the model !
accuracy_score on the 18-th fold: 0.597713097713
Fold [19]
Fitting the model !
accuracy_score on the 19-th fold: 0.453319502075
Fold [20]
Fitting the model !
accuracy_score on the 20-th fold: 0.445414847162
Fold [21]
Fitting the model !
accuracy_score on the 21-th fold: 0.440619621343
Fold [22]
Fitting the model !
accuracy_score on the 22-th fold: 0.368421052632
Fold [23]
Fitting the model !
accuracy_score on the 23-th fold: 0.485596707819
Fold [24]
Fitting the model !
accuracy_score on the 24-th fold: 0.597777777778
Fold [25]
Fitting the model !
accuracy_score on the 25-th fold: 0.516629711752
Fold [26]
Fitting the model !
accuracy_score on the 26-th fold: 0.538641686183
Fold [27]
Fitting the model !
accuracy_score on the 27-th fold: 0.566473988439
Fold [28]
Fitting the model !
accuracy_score on the 28-th fold: 0.522522522523
Fold [29]
Fitting the model !
accuracy_score on the 29-th fold: 0.594505494505
Fold [30]
Fitting the model !
accuracy_score on the 30-th fold: 0.559523809524
Fold [31]
Fitting the model !
accuracy_score on the 31-th fold: 0.590182648402
Fold [32]
Fitting the model !
accuracy_score on the 32-th fold: 0.624768946396
Fold [33]
Fitting the model !
accuracy_score on the 33-th fold: 0.478021978022
Fold [34]
Fitting the model !
accuracy_score on the 34-th fold: 0.596566523605
Fold [35]
Fitting the model !
accuracy_score on the 35-th fold: 0.506833712984
Fold [36]
Fitting the model !
accuracy_score on the 36-th fold: 0.521178637201
Fold [37]
Fitting the model !
accuracy_score on the 37-th fold: 0.480263157895
Fold [38]
Fitting the model !
accuracy_score on the 38-th fold: 0.581370449679
Fold [39]
Fitting the model !
accuracy_score on the 39-th fold: 0.563636363636
Fold [40]
Fitting the model !
accuracy_score on the 40-th fold: 0.523148148148
Fold [41]
Fitting the model !
accuracy_score on the 41-th fold: 0.504514672686
Fold [42]
Fitting the model !
accuracy_score on the 42-th fold: 0.523629489603
Fold [43]
Fitting the model !
accuracy_score on the 43-th fold: 0.553539019964
Fold [44]
Fitting the model !
accuracy_score on the 44-th fold: 0.486956521739
Training classifier [1]
Fold [0]
Fitting the model !
accuracy_score on the 0-th fold: 0.558620689655
Fold [1]
Fitting the model !
accuracy_score on the 1-th fold: 0.481902792141
Fold [2]
Fitting the model !
accuracy_score on the 2-th fold: 0.53591160221
Fold [3]
Fitting the model !
accuracy_score on the 3-th fold: 0.517799352751
Fold [4]
Fitting the model !
accuracy_score on the 4-th fold: 0.510226049516
Fold [5]
Fitting the model !
accuracy_score on the 5-th fold: 0.530079455165
Fold [6]
Fitting the model !
accuracy_score on the 6-th fold: 0.617216117216
Fold [7]
Fitting the model !
accuracy_score on the 7-th fold: 0.502732240437
Fold [8]
Fitting the model !
accuracy_score on the 8-th fold: 0.600853788687
Fold [9]
Fitting the model !
accuracy_score on the 9-th fold: 0.459668508287
Fold [10]
Fitting the model !
accuracy_score on the 10-th fold: 0.55871886121
Fold [11]
Fitting the model !
accuracy_score on the 11-th fold: 0.524855491329
Fold [12]
Fitting the model !
accuracy_score on the 12-th fold: 0.506343713956
Fold [13]
Fitting the model !
accuracy_score on the 13-th fold: 0.527472527473
Fold [14]
Fitting the model !
accuracy_score on the 14-th fold: 0.390495867769
Fold [15]
Fitting the model !
accuracy_score on the 15-th fold: 0.511137162954
Fold [16]
Fitting the model !
accuracy_score on the 16-th fold: 0.536
Fold [17]
Fitting the model !
accuracy_score on the 17-th fold: 0.48829787234
Fold [18]
Fitting the model !
accuracy_score on the 18-th fold: 0.607068607069
Fold [19]
Fitting the model !
accuracy_score on the 19-th fold: 0.485477178423
Fold [20]
Fitting the model !
accuracy_score on the 20-th fold: 0.485807860262
Fold [21]
Fitting the model !
accuracy_score on the 21-th fold: 0.459552495697
Fold [22]
Fitting the model !
accuracy_score on the 22-th fold: 0.334736842105
Fold [23]
Fitting the model !
accuracy_score on the 23-th fold: 0.502057613169
Fold [24]
Fitting the model !
accuracy_score on the 24-th fold: 0.526666666667
Fold [25]
Fitting the model !
accuracy_score on the 25-th fold: 0.513303769401
Fold [26]
Fitting the model !
accuracy_score on the 26-th fold: 0.555035128806
Fold [27]
Fitting the model !
accuracy_score on the 27-th fold: 0.568400770713
Fold [28]
Fitting the model !
accuracy_score on the 28-th fold: 0.484234234234
Fold [29]
Fitting the model !
accuracy_score on the 29-th fold: 0.59010989011
Fold [30]
Fitting the model !
accuracy_score on the 30-th fold: 0.489177489177
Fold [31]
Fitting the model !
accuracy_score on the 31-th fold: 0.51598173516
Fold [32]
Fitting the model !
accuracy_score on the 32-th fold: 0.543438077634
Fold [33]
Fitting the model !
accuracy_score on the 33-th fold: 0.492307692308
Fold [34]
Fitting the model !
accuracy_score on the 34-th fold: 0.507510729614
Fold [35]
Fitting the model !
accuracy_score on the 35-th fold: 0.521640091116
Fold [36]
Fitting the model !
accuracy_score on the 36-th fold: 0.515653775322
Fold [37]
Fitting the model !
accuracy_score on the 37-th fold: 0.468201754386
Fold [38]
Fitting the model !
accuracy_score on the 38-th fold: 0.601713062099
Fold [39]
Fitting the model !
accuracy_score on the 39-th fold: 0.569696969697
Fold [40]
Fitting the model !
accuracy_score on the 40-th fold: 0.52662037037
Fold [41]
Fitting the model !
accuracy_score on the 41-th fold: 0.51467268623
Fold [42]
Fitting the model !
accuracy_score on the 42-th fold: 0.491493383743
Fold [43]
Fitting the model !
accuracy_score on the 43-th fold: 0.620689655172
Fold [44]
Fitting the model !
accuracy_score on the 44-th fold: 0.515217391304
Training classifier [2]
Fold [0]
Fitting the model !
accuracy_score on the 0-th fold: 0.54367816092
Fold [1]
Fitting the model !
accuracy_score on the 1-th fold: 0.486039296794
Fold [2]
Fitting the model !
accuracy_score on the 2-th fold: 0.570165745856
Fold [3]
Fitting the model !
accuracy_score on the 3-th fold: 0.615965480043
Fold [4]
Fitting the model !
accuracy_score on the 4-th fold: 0.539289558665
Fold [5]
Fitting the model !
accuracy_score on the 5-th fold: 0.564131668558
Fold [6]
Fitting the model !
accuracy_score on the 6-th fold: 0.615384615385
Fold [7]
Fitting the model !
accuracy_score on the 7-th fold: 0.591256830601
Fold [8]
Fitting the model !
accuracy_score on the 8-th fold: 0.616862326574
Fold [9]
Fitting the model !
accuracy_score on the 9-th fold: 0.394475138122
Fold [10]
Fitting the model !
accuracy_score on the 10-th fold: 0.511269276394
Fold [11]
Fitting the model !
accuracy_score on the 11-th fold: 0.543352601156
Fold [12]
Fitting the model !
accuracy_score on the 12-th fold: 0.495963091119
Fold [13]
Fitting the model !
accuracy_score on the 13-th fold: 0.547008547009
Fold [14]
Fitting the model !
accuracy_score on the 14-th fold: 0.371900826446
Fold [15]
Fitting the model !
accuracy_score on the 15-th fold: 0.539273153576
Fold [16]
Fitting the model !
accuracy_score on the 16-th fold: 0.516571428571
Fold [17]
Fitting the model !
accuracy_score on the 17-th fold: 0.423404255319
Fold [18]
Fitting the model !
accuracy_score on the 18-th fold: 0.5841995842
Fold [19]
Fitting the model !
accuracy_score on the 19-th fold: 0.427385892116
Fold [20]
Fitting the model !
accuracy_score on the 20-th fold: 0.446506550218
Fold [21]
Fitting the model !
accuracy_score on the 21-th fold: 0.36660929432
Fold [22]
Fitting the model !
accuracy_score on the 22-th fold: 0.350526315789
Fold [23]
Fitting the model !
accuracy_score on the 23-th fold: 0.475308641975
Fold [24]
Fitting the model !
accuracy_score on the 24-th fold: 0.581111111111
Fold [25]
Fitting the model !
accuracy_score on the 25-th fold: 0.49889135255
Fold [26]
Fitting the model !
accuracy_score on the 26-th fold: 0.550351288056
Fold [27]
Fitting the model !
accuracy_score on the 27-th fold: 0.522157996146
Fold [28]
Fitting the model !
accuracy_score on the 28-th fold: 0.523648648649
Fold [29]
Fitting the model !
accuracy_score on the 29-th fold: 0.582417582418
Fold [30]
Fitting the model !
accuracy_score on the 30-th fold: 0.504329004329
Fold [31]
Fitting the model !
accuracy_score on the 31-th fold: 0.584474885845
Fold [32]
Fitting the model !
accuracy_score on the 32-th fold: 0.608133086876
Fold [33]
Fitting the model !
accuracy_score on the 33-th fold: 0.57032967033
Fold [34]
Fitting the model !
accuracy_score on the 34-th fold: 0.582618025751
Fold [35]
Fitting the model !
accuracy_score on the 35-th fold: 0.514806378132
Fold [36]
Fitting the model !
accuracy_score on the 36-th fold: 0.51197053407
Fold [37]
Fitting the model !
accuracy_score on the 37-th fold: 0.536184210526
Fold [38]
Fitting the model !
accuracy_score on the 38-th fold: 0.597430406852
Fold [39]
Fitting the model !
accuracy_score on the 39-th fold: 0.579797979798
Fold [40]
Fitting the model !
accuracy_score on the 40-th fold: 0.570601851852
Fold [41]
Fitting the model !
accuracy_score on the 41-th fold: 0.540632054176
Fold [42]
Fitting the model !
accuracy_score on the 42-th fold: 0.603024574669
Fold [43]
Fitting the model !
accuracy_score on the 43-th fold: 0.569872958258
Fold [44]
Fitting the model !
accuracy_score on the 44-th fold: 0.565217391304
Training classifier [3]
Fold [0]
Fitting the model !
accuracy_score on the 0-th fold: 0.56091954023
Fold [1]
Fitting the model !
accuracy_score on the 1-th fold: 0.506721820062
Fold [2]
Fitting the model !
accuracy_score on the 2-th fold: 0.572375690608
Fold [3]
Fitting the model !
accuracy_score on the 3-th fold: 0.607335490831
Fold [4]
Fitting the model !
accuracy_score on the 4-th fold: 0.539289558665
Fold [5]
Fitting the model !
accuracy_score on the 5-th fold: 0.5561861521
Fold [6]
Fitting the model !
accuracy_score on the 6-th fold: 0.635531135531
Fold [7]
Fitting the model !
accuracy_score on the 7-th fold: 0.609836065574
Fold [8]
Fitting the model !
accuracy_score on the 8-th fold: 0.581643543223
Fold [9]
Fitting the model !
accuracy_score on the 9-th fold: 0.420994475138
Fold [10]
Fitting the model !
accuracy_score on the 10-th fold: 0.506524317912
Fold [11]
Fitting the model !
accuracy_score on the 11-th fold: 0.575722543353
Fold [12]
Fitting the model !
accuracy_score on the 12-th fold: 0.528258362168
Fold [13]
Fitting the model !
accuracy_score on the 13-th fold: 0.529914529915
Fold [14]
Fitting the model !
accuracy_score on the 14-th fold: 0.394628099174
Fold [15]
Fitting the model !
accuracy_score on the 15-th fold: 0.57796014068
Fold [16]
Fitting the model !
accuracy_score on the 16-th fold: 0.540571428571
Fold [17]
Fitting the model !
accuracy_score on the 17-th fold: 0.470212765957
Fold [18]
Fitting the model !
accuracy_score on the 18-th fold: 0.546777546778
Fold [19]
Fitting the model !
accuracy_score on the 19-th fold: 0.438796680498
Fold [20]
Fitting the model !
accuracy_score on the 20-th fold: 0.454148471616
Fold [21]
Fitting the model !
accuracy_score on the 21-th fold: 0.421686746988
Fold [22]
Fitting the model !
accuracy_score on the 22-th fold: 0.349473684211
Fold [23]
Fitting the model !
accuracy_score on the 23-th fold: 0.491769547325
Fold [24]
Fitting the model !
accuracy_score on the 24-th fold: 0.543333333333
Fold [25]
Fitting the model !
accuracy_score on the 25-th fold: 0.519955654102
Fold [26]
Fitting the model !
accuracy_score on the 26-th fold: 0.531615925059
Fold [27]
Fitting the model !
accuracy_score on the 27-th fold: 0.543352601156
Fold [28]
Fitting the model !
accuracy_score on the 28-th fold: 0.560810810811
Fold [29]
Fitting the model !
accuracy_score on the 29-th fold: 0.543956043956
Fold [30]
Fitting the model !
accuracy_score on the 30-th fold: 0.522727272727
Fold [31]
Fitting the model !
accuracy_score on the 31-th fold: 0.5399543379
Fold [32]
Fitting the model !
accuracy_score on the 32-th fold: 0.619223659889
Fold [33]
Fitting the model !
accuracy_score on the 33-th fold: 0.540659340659
Fold [34]
Fitting the model !
accuracy_score on the 34-th fold: 0.554721030043
Fold [35]
Fitting the model !
accuracy_score on the 35-th fold: 0.53416856492
Fold [36]
Fitting the model !
accuracy_score on the 36-th fold: 0.469613259669
Fold [37]
Fitting the model !
accuracy_score on the 37-th fold: 0.571271929825
Fold [38]
Fitting the model !
accuracy_score on the 38-th fold: 0.571734475375
Fold [39]
Fitting the model !
accuracy_score on the 39-th fold: 0.527272727273
Fold [40]
Fitting the model !
accuracy_score on the 40-th fold: 0.542824074074
Fold [41]
Fitting the model !
accuracy_score on the 41-th fold: 0.511286681716
Fold [42]
Fitting the model !
accuracy_score on the 42-th fold: 0.631379962193
Fold [43]
Fitting the model !
accuracy_score on the 43-th fold: 0.551724137931
Fold [44]
Fitting the model !
accuracy_score on the 44-th fold: 0.579347826087
Out[68]:
StackedClassifier(clfs=[RandomForestClassifier(bootstrap=True, class_weight='balanced',
            criterion='entropy', max_depth=None, max_features=10,
            max_leaf_nodes=None, min_impurity_split=1e-07,
            min_samples_leaf=5, min_samples_split=25,
            min_weight_fraction_leaf=0.0, n_estimat...logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=42, silent=True, subsample=1)],
         level2_learner=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
         oob_metrics=<function accuracy_score at 0x114abc2a8>,
         save_dir=None,
         skf=[(array([   0,    1, ..., 3743, 3744]), array([ 471,  472, ..., 4147, 4148])), (array([   0,    1, ..., 4147, 4148]), array([ 471,  472, ..., 2785, 2786])), (array([   0,    1, ..., 4147, 4148]), array([ 471,  472, ..., 2284, 2285])), (array([   0,    1, ..., 4147, 4148]), array([ 471,  472, ......1, ..., 3280, 3281])), (array([ 471,  472, ..., 4147, 4148]), array([   0,    1, ..., 1384, 1385]))])

Apply to test


In [69]:
well_name_valid = df.loc[(df['origin']=='test'),"Well Name"]

In [76]:
preds = stacked.predict_proba(xvalid_fs)


Loading Training classifier [0] ...
... of Fold [0]
... of Fold [1]
... of Fold [2]
... of Fold [3]
... of Fold [4]
... of Fold [5]
... of Fold [6]
... of Fold [7]
... of Fold [8]
... of Fold [9]
... of Fold [10]
... of Fold [11]
... of Fold [12]
... of Fold [13]
... of Fold [14]
... of Fold [15]
... of Fold [16]
... of Fold [17]
... of Fold [18]
... of Fold [19]
... of Fold [20]
... of Fold [21]
... of Fold [22]
... of Fold [23]
... of Fold [24]
... of Fold [25]
... of Fold [26]
... of Fold [27]
... of Fold [28]
... of Fold [29]
... of Fold [30]
... of Fold [31]
... of Fold [32]
... of Fold [33]
... of Fold [34]
... of Fold [35]
... of Fold [36]
... of Fold [37]
... of Fold [38]
... of Fold [39]
... of Fold [40]
... of Fold [41]
... of Fold [42]
... of Fold [43]
... of Fold [44]
Loading Training classifier [1] ...
... of Fold [0]
... of Fold [1]
... of Fold [2]
... of Fold [3]
... of Fold [4]
... of Fold [5]
... of Fold [6]
... of Fold [7]
... of Fold [8]
... of Fold [9]
... of Fold [10]
... of Fold [11]
... of Fold [12]
... of Fold [13]
... of Fold [14]
... of Fold [15]
... of Fold [16]
... of Fold [17]
... of Fold [18]
... of Fold [19]
... of Fold [20]
... of Fold [21]
... of Fold [22]
... of Fold [23]
... of Fold [24]
... of Fold [25]
... of Fold [26]
... of Fold [27]
... of Fold [28]
... of Fold [29]
... of Fold [30]
... of Fold [31]
... of Fold [32]
... of Fold [33]
... of Fold [34]
... of Fold [35]
... of Fold [36]
... of Fold [37]
... of Fold [38]
... of Fold [39]
... of Fold [40]
... of Fold [41]
... of Fold [42]
... of Fold [43]
... of Fold [44]
Loading Training classifier [2] ...
... of Fold [0]
... of Fold [1]
... of Fold [2]
... of Fold [3]
... of Fold [4]
... of Fold [5]
... of Fold [6]
... of Fold [7]
... of Fold [8]
... of Fold [9]
... of Fold [10]
... of Fold [11]
... of Fold [12]
... of Fold [13]
... of Fold [14]
... of Fold [15]
... of Fold [16]
... of Fold [17]
... of Fold [18]
... of Fold [19]
... of Fold [20]
... of Fold [21]
... of Fold [22]
... of Fold [23]
... of Fold [24]
... of Fold [25]
... of Fold [26]
... of Fold [27]
... of Fold [28]
... of Fold [29]
... of Fold [30]
... of Fold [31]
... of Fold [32]
... of Fold [33]
... of Fold [34]
... of Fold [35]
... of Fold [36]
... of Fold [37]
... of Fold [38]
... of Fold [39]
... of Fold [40]
... of Fold [41]
... of Fold [42]
... of Fold [43]
... of Fold [44]
Loading Training classifier [3] ...
... of Fold [0]
... of Fold [1]
... of Fold [2]
... of Fold [3]
... of Fold [4]
... of Fold [5]
... of Fold [6]
... of Fold [7]
... of Fold [8]
... of Fold [9]
... of Fold [10]
... of Fold [11]
... of Fold [12]
... of Fold [13]
... of Fold [14]
... of Fold [15]
... of Fold [16]
... of Fold [17]
... of Fold [18]
... of Fold [19]
... of Fold [20]
... of Fold [21]
... of Fold [22]
... of Fold [23]
... of Fold [24]
... of Fold [25]
... of Fold [26]
... of Fold [27]
... of Fold [28]
... of Fold [29]
... of Fold [30]
... of Fold [31]
... of Fold [32]
... of Fold [33]
... of Fold [34]
... of Fold [35]
... of Fold [36]
... of Fold [37]
... of Fold [38]
... of Fold [39]
... of Fold [40]
... of Fold [41]
... of Fold [42]
... of Fold [43]
... of Fold [44]

In [94]:
classes = list(set(ytrain))
preds_hard = [classes[i] for i in np.argmax(preds, axis=1)]

In [95]:
well = "CRAWFORD"
depth = xvalid.loc[well_name_valid== well ,"Depth"]
predictions = pd.Series(preds_hard).loc[well_name_valid==well]
plt.plot(depth,predictions)
plt.axis([2950,3175, 1, 9])
plt.grid(b=True, which='major', color='r', linestyle='--')
plt.show()



In [96]:
well = "STUART"
depth = xvalid.loc[well_name_valid== well ,"Depth"]
predictions = pd.Series(preds_hard).loc[well_name_valid==well]
plt.plot(depth,predictions)
plt.axis([2800,3050, 1, 9])
plt.grid(b=True, which='major', color='r', linestyle='--')
plt.show()



In [97]:
xvalid['Facies']=preds_hard
xvalid.to_csv('XmasPreds_6.csv')