In [1]:
import numpy as np
np.random.seed(1337)
import warnings
warnings.filterwarnings("ignore")
import time as tm
import pandas as pd
from keras.models import Sequential, Model
from keras.constraints import maxnorm
from keras.layers import Dense, Dropout, Activation
from keras.utils import np_utils
from sklearn.metrics import f1_score, recall_score, accuracy_score, confusion_matrix
from sklearn.model_selection import LeaveOneGroupOut
from sklearn import preprocessing
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from mpl_toolkits.axes_grid1 import make_axes_locatable
from scipy.signal import medfilt
%matplotlib notebook
In [2]:
training_data = pd.read_csv('../data/training_data.csv')
blind_data = pd.read_csv('../data/nofacies_data.csv')
In [3]:
def accuracy(conf):
total_correct = 0.
nb_classes = conf.shape[0]
for i in np.arange(0,nb_classes):
total_correct += conf[i][i]
acc = total_correct/sum(sum(conf))
return acc
adjacent_facies = np.array([[1], [0, 2], [1], [4], [3, 5], [4, 6, 7], [5, 7], [5, 6, 8], [6, 7]])
def accuracy_adjacent(conf, adjacent_facies):
nb_classes = conf.shape[0]
total_correct = 0.
for i in np.arange(0,nb_classes):
total_correct += conf[i][i]
for j in adjacent_facies[i]:
total_correct += conf[i][j]
return total_correct / sum(sum(conf))
In [4]:
# 1=sandstone 2=c_siltstone 3=f_siltstone
# 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite
# 8=packstone 9=bafflestone
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',
'#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',
'WS', 'D','PS', 'BS']
#facies_color_map is a dictionary that maps facies labels
#to their respective colors
facies_color_map = {}
for ind, label in enumerate(facies_labels):
facies_color_map[label] = facies_colors[ind]
def label_facies(row, labels):
return labels[ row['Facies'] -1]
training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1)
In [5]:
def make_facies_log_plot(logs, facies_colors):
#make sure logs are sorted by depth
logs = logs.sort_values(by='Depth')
cmap_facies = colors.ListedColormap(
facies_colors[0:len(facies_colors)], 'indexed')
ztop=logs.Depth.min(); zbot=logs.Depth.max()
cluster=np.repeat(np.expand_dims(logs['Facies'].values,1), 100, 1)
f, ax = plt.subplots(nrows=1, ncols=6, figsize=(8, 12))
ax[0].plot(logs.GR, logs.Depth, '-g')
ax[1].plot(logs.ILD_log10, logs.Depth, '-')
ax[2].plot(logs.DeltaPHI, logs.Depth, '-', color='0.5')
ax[3].plot(logs.PHIND, logs.Depth, '-', color='r')
ax[4].plot(logs.PE, logs.Depth, '-', color='black')
im=ax[5].imshow(cluster, interpolation='none', aspect='auto',
cmap=cmap_facies,vmin=1,vmax=9)
divider = make_axes_locatable(ax[5])
cax = divider.append_axes("right", size="20%", pad=0.05)
cbar=plt.colorbar(im, cax=cax)
cbar.set_label((17*' ').join([' SS ', 'CSiS', 'FSiS',
'SiSh', ' MS ', ' WS ', ' D ',
' PS ', ' BS ']))
cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')
for i in range(len(ax)-1):
ax[i].set_ylim(ztop,zbot)
ax[i].invert_yaxis()
ax[i].grid()
ax[i].locator_params(axis='x', nbins=3)
ax[0].set_xlabel("GR")
ax[0].set_xlim(logs.GR.min(),logs.GR.max())
ax[1].set_xlabel("ILD_log10")
ax[1].set_xlim(logs.ILD_log10.min(),logs.ILD_log10.max())
ax[2].set_xlabel("DeltaPHI")
ax[2].set_xlim(logs.DeltaPHI.min(),logs.DeltaPHI.max())
ax[3].set_xlabel("PHIND")
ax[3].set_xlim(logs.PHIND.min(),logs.PHIND.max())
ax[4].set_xlabel("PE")
ax[4].set_xlim(logs.PE.min(),logs.PE.max())
ax[5].set_xlabel('Facies')
ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])
ax[4].set_yticklabels([]); ax[5].set_yticklabels([])
ax[5].set_xticklabels([])
f.suptitle('Well: %s'%logs.iloc[0]['Well Name'], fontsize=14,y=0.94)
In [6]:
X = training_data.drop(['Formation', 'Well Name', 'Depth', 'Facies', 'FaciesLabels'], axis=1).values
y = training_data['Facies'].values - 1
X_blind = blind_data.drop(['Formation', 'Well Name', 'Depth'], axis=1).values
wells = training_data["Well Name"].values
In [7]:
def fDNN(in_dim, out_dim):
# Model
model = Sequential()
model.add(Dense(210, input_dim=in_dim, activation='relu'))
model.add(Dense(70, activation='relu'))
model.add(Dense(out_dim, activation='softmax'))
# Compilation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
In [8]:
logo = LeaveOneGroupOut()
nb_classes = 9
t0 = tm.time()
In [9]:
f1s_ls = []
acc_ls = []
adj_ls = []
for train, test in logo.split(X, y, groups=wells):
well_name = wells[test[0]]
# Scaling
scaler = preprocessing.RobustScaler().fit(X)
X_tr = scaler.transform(X[train])
X_te = scaler.transform(X[test])
Y_tr = np_utils.to_categorical(y[train], nb_classes)
in_dim = len(X_tr[0])
# Method initialization
mlp = fDNN(in_dim, nb_classes)
# Training
mlp.fit(X_tr, Y_tr, nb_epoch=10, batch_size=20, verbose=0)
# Predict
y_hat = mlp.predict_classes(X_te, verbose=0)
y_hat = medfilt(y_hat, kernel_size=7)
try:
f1s = f1_score(y[test], y_hat, average="weighted", labels=[0, 1, 2, 3, 4, 5, 6, 7, 8])
except:
f1s = 0
try:
conf = confusion_matrix(y[test], y_hat, labels=[0, 1, 2, 3, 4, 5, 6, 7, 8])
acc = accuracy(conf) # similar to f1 micro
except:
acc = 0
try:
acc_adj = accuracy_adjacent(conf, adjacent_facies)
except:
acc_adj = 0
f1s_ls += [f1s]
acc_ls += [acc]
adj_ls += [acc_adj]
print("{:>20s} f1_weigthted:{:.3f} | acc:{:.3f} | acc_adj:{:.3f}".format(well_name, f1s, acc, acc_adj))
t1 = tm.time()
print("Avg F1", np.average(f1s_ls)*100, "Avg Acc", np.average(acc_ls)*100, "Avg Adj", np.average(adj_ls)*100)
print((t1-t0), "seconds")
In [10]:
X_train = scaler.transform(X)
Y_train = np_utils.to_categorical(y, nb_classes)
mlp = fDNN(in_dim, nb_classes)
mlp.fit(X_train, Y_train, nb_epoch=10, batch_size=20, verbose=0)
Out[10]:
In [11]:
x_blind = scaler.transform(X_blind)
y_blind = mlp.predict_classes(x_blind, verbose=0)
y_blind = medfilt(y_blind, kernel_size=7)
blind_data["Facies"] = y_blind + 1 # return the original value (1-9)
In [12]:
blind_data.head()
Out[12]:
In [13]:
blind_data.to_csv("PA_Team_Submission_0.csv")
In [14]:
make_facies_log_plot(
blind_data[blind_data['Well Name'] == 'STUART'],
facies_colors)
In [15]:
make_facies_log_plot(
blind_data[blind_data['Well Name'] == 'CRAWFORD'],
facies_colors)
In [ ]: