In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.metrics import f1_score, accuracy_score, make_scorer
from sklearn.model_selection import LeaveOneGroupOut
In [2]:
training_data = pd.read_csv("training_data.csv")
training_data['Well Name'] = training_data['Well Name'].astype('category')
training_data['Formation'] = training_data['Formation'].astype('category')
blind = pd.read_csv('validation_data_nofacies.csv')
blind['Well Name'] = blind['Well Name'].astype('category')
blind['Formation'] = blind['Formation'].astype('category')
In [3]:
# training data
above = []
below = []
cols = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
for i, group in training_data.groupby('Well Name'):
df = group.sort_values('Depth')
dfa = df.shift(-1).fillna(method='ffill')
dfb = df.shift(1).fillna(method='bfill')
above.append(dfa[cols])
below.append(dfb[cols])
above_df = pd.concat(above)
below_df = pd.concat(below)
above_df.columns = ['above_'+ column for column in above_df.columns]
below_df.columns = ['below_'+ column for column in below_df.columns]
training_data = pd.concat((training_data, above_df, below_df), axis=1)
y = training_data['Facies'].values
X = training_data.drop(['Formation', 'Well Name','Facies'], axis=1)
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)
In [4]:
# validation data
above = []
below = []
cols = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
for i, group in blind.groupby('Well Name'):
df = group.sort_values('Depth')
dfa = df.shift(-1).fillna(method='ffill')
dfb = df.shift(1).fillna(method='bfill')
above.append(dfa[cols])
below.append(dfb[cols])
above_df = pd.concat(above)
below_df = pd.concat(below)
above_df.columns = ['above_'+ column for column in above_df.columns]
below_df.columns = ['below_'+ column for column in below_df.columns]
blind = pd.concat((blind, above_df, below_df), axis=1)
X_blind = np.array(blind.drop(['Formation', 'Well Name'], axis=1))
X_blind = scaler.transform(X_blind)
In [5]:
from sklearn.neural_network import MLPClassifier as mlp
ML_classifier = mlp(solver='adam', alpha=1e-5, hidden_layer_sizes=(100, ), random_state=49,
learning_rate='adaptive', learning_rate_init=0.001, max_iter=1000)
In [6]:
from sklearn.ensemble import RandomForestClassifier as rfc
RF_classifier = rfc(n_estimators = 200, max_depth=None, min_samples_split=2, random_state=10)
In [7]:
from sklearn.ensemble import AdaBoostClassifier as abc
AB_classifier = abc(rfc(), n_estimators=200, learning_rate=0.01,
random_state=0, algorithm='SAMME.R')
In [8]:
from sklearn.ensemble import VotingClassifier
V_classifier = VotingClassifier(estimators=[('MLP', ML_classifier), ('RFC', RF_classifier),
('ABC', AB_classifier)],
voting='soft', weights=[0.15,0.425,0.425])
In [9]:
f1_vc = []
wells = training_data["Well Name"].values
logo = LeaveOneGroupOut()
for train, test in logo.split(X, y, groups=wells):
well_name = wells[test[0]]
V_classifier.fit(X[train], y[train])
pred_vc = V_classifier.predict(X[test])
sc = f1_score(y[test], pred_vc, labels = np.arange(10), average = 'micro')
print("{:>20s} {:.3f}".format(well_name, sc))
f1_vc.append(sc)
print "-Average leave-one-well-out F1 Score: %6f" % (sum(f1_vc)/(1.0*(len(f1_vc))))
print " *** methodogy and code borrowed from MandMs *** "
In [10]:
y_blind = V_classifier.fit(X, y).predict(X_blind)
blind['Facies'] = y_blind
blind.to_csv('Predicted_Facies_3.csv')
In [ ]: