In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import f1_score, accuracy_score, make_scorer
from sklearn.model_selection import LeaveOneGroupOut

Loadind data

In [2]:
training_data = pd.read_csv("training_data.csv")
training_data['Well Name'] = training_data['Well Name'].astype('category')
training_data['Formation'] = training_data['Formation'].astype('category')

blind = pd.read_csv('validation_data_nofacies.csv')
blind['Well Name'] = blind['Well Name'].astype('category')
blind['Formation'] = blind['Formation'].astype('category')

Feature engineering

In [3]:
# training data

above = []
below = []

cols = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']

for i, group in training_data.groupby('Well Name'):
    df = group.sort_values('Depth')
    dfa = df.shift(-1).fillna(method='ffill')
    dfb = df.shift(1).fillna(method='bfill')

above_df = pd.concat(above)
below_df = pd.concat(below)

above_df.columns = ['above_'+ column for column in above_df.columns]
below_df.columns = ['below_'+ column for column in below_df.columns]

training_data = pd.concat((training_data, above_df, below_df), axis=1)
y = training_data['Facies'].values
X = training_data.drop(['Formation', 'Well Name','Facies'], axis=1)
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

In [4]:
# validation data

above = []
below = []

cols = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']

for i, group in blind.groupby('Well Name'):
    df = group.sort_values('Depth')
    dfa = df.shift(-1).fillna(method='ffill')
    dfb = df.shift(1).fillna(method='bfill')

above_df = pd.concat(above)
below_df = pd.concat(below)

above_df.columns = ['above_'+ column for column in above_df.columns]
below_df.columns = ['below_'+ column for column in below_df.columns]

blind = pd.concat((blind, above_df, below_df), axis=1)

X_blind = np.array(blind.drop(['Formation', 'Well Name'], axis=1)) 
X_blind = scaler.transform(X_blind)

Multi Layer Perceptron

In [5]:
from sklearn.neural_network import MLPClassifier as mlp
ML_classifier = mlp(solver='adam', alpha=1e-5, hidden_layer_sizes=(100, ), random_state=49,
                   learning_rate='adaptive', learning_rate_init=0.001, max_iter=1000)

Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier as rfc
RF_classifier = rfc(n_estimators = 200, max_depth=None, min_samples_split=2, random_state=10)


In [7]:
from sklearn.ensemble import AdaBoostClassifier as abc
AB_classifier = abc(rfc(), n_estimators=200, learning_rate=0.01, 
                    random_state=0, algorithm='SAMME.R')

Ensemble of ML, RF, and AB classifiers

In [8]:
from sklearn.ensemble import VotingClassifier
V_classifier = VotingClassifier(estimators=[('MLP', ML_classifier), ('RFC', RF_classifier),
                                            ('ABC', AB_classifier)], 
                                voting='soft', weights=[0.15,0.425,0.425])

In [9]:
f1_vc = []

wells = training_data["Well Name"].values
logo = LeaveOneGroupOut()

for train, test in logo.split(X, y, groups=wells):
    well_name = wells[test[0]][train], y[train])
    pred_vc = V_classifier.predict(X[test])
    sc = f1_score(y[test], pred_vc, labels = np.arange(10), average = 'micro')
    print("{:>20s}  {:.3f}".format(well_name, sc))
print "-Average leave-one-well-out F1 Score: %6f" % (sum(f1_vc)/(1.0*(len(f1_vc))))
print " *** methodogy and code borrowed from MandMs *** "

      CROSS H CATTLE  0.383
            LUKE G U  0.527
               NEWBY  0.441
               NOLAN  0.506
          Recruit F9  0.882
             SHANKLE  0.523
           SHRIMPLIN  0.626
-Average leave-one-well-out F1 Score: 0.559150
 *** methodogy and code borrowed from MandMs *** 


In [10]:
y_blind =, y).predict(X_blind) 
blind['Facies'] = y_blind

In [ ]: