In [164]:
%matplotlib inline
import pandas as pd
import numpy as np
from pandas import set_option
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, f1_score
from collections import Counter
import operator
from keras.models import Model, Sequential
from keras.layers import Convolution2D, Dense, Input, Dropout, Flatten, MaxPooling2D, Activation
from keras.optimizers import Nadam
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
np.random.seed(42)
def accuracy(conf):
total_correct = 0.
nb_classes = conf.shape[0]
for i in np.arange(0,nb_classes):
total_correct += conf[i][i]
acc = total_correct/sum(sum(conf))
return acc
def label_facies(row, labels):
return labels[ row['Facies'] -1]
set_option("display.max_rows", 10)
pd.options.mode.chained_assignment = None
filename = 'facies_vectors.csv'
training_data = pd.read_csv(filename)
training_data['Well Name'] = training_data['Well Name'].astype('category')
training_data['Formation'] = training_data['Formation'].astype('category')
training_data['Well Name'].unique()
# 1=sandstone 2=c_siltstone 3=f_siltstone
# 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite
# 8=packstone 9=bafflestone
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',
'#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']
facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',
'WS', 'D','PS', 'BS']
#facies_color_map is a dictionary that maps facies labels
#to their respective colors
facies_color_map = {}
for ind, label in enumerate(facies_labels):
facies_color_map[label] = facies_colors[ind]
training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1)
PE_mask = training_data['PE'].notnull().values
mean_pe = training_data['PE'].mean()
std_pe = training_data['PE'].std()
training_data['PE'] = (training_data['PE']-mean_pe)/std_pe
PE_mask = training_data['PE'].notnull().values
training_data['PE'] = training_data['PE'].fillna(value=0)
correct_facies_labels = training_data['Facies'].values
feature_vectors = training_data.drop(['Formation', 'FaciesLabels'], axis=1)#, 'RELPOS', 'NM_M', 'Depth', 'ILD_log10', 'DeltaPHI', 'PHIND'], axis=1)
well_labels = training_data[['Well Name', 'Facies']].values
data_vectors = feature_vectors.drop(['Well Name', 'Facies'], axis=1).values
scaler = preprocessing.StandardScaler().fit(data_vectors)
scaled_features = scaler.transform(data_vectors)
data_out = np.hstack([well_labels, scaled_features])
In [75]:
data = data_out
well_data = {}
well_names = list(set(data[:, 0]))
for name in well_names:
well_data[name] = [[], []]
for row in data:
well_data[row[0]][1].append(row[1])
well_data[row[0]][0].append(list(row[2::]))
positive_lag = 10
negative_lag = 11
chunks_cnn = []
chunks_cnn_test = []
chunk_length = positive_lag+negative_lag+1 #were gonna predict middle facies
chunks_facies_cnn = []
for name in well_names:
if name not in ['STUART', 'CRAWFORD']:
test_well_data = well_data[name]
log_values = np.array(test_well_data[0])
log_values_padded = np.lib.pad(log_values, (negative_lag,positive_lag), 'edge')[:, negative_lag:-positive_lag]
facies_values = np.array(test_well_data[1])
for i in range(log_values.shape[0]):
chunk = log_values_padded[i:i+chunk_length, :]
chunk_trans = chunk.T
chunks_cnn.append(chunk_trans)
chunks_facies_cnn.append(facies_values[i])
else:
test_well_data = well_data[name]
log_values = np.array(test_well_data[0])
log_values_padded = np.lib.pad(log_values, (negative_lag,positive_lag), 'edge')[:, negative_lag:-positive_lag]
facies_values = np.array(test_well_data[1])
for i in range(log_values.shape[0]):
chunk = log_values_padded[i:i+chunk_length, :]
chunk_trans = chunk.T
chunks_cnn_test.append(chunk_trans)
chunks_cnn = np.array(chunks_cnn)
chunks_cnn_test = np.array(chunks_cnn_test)
chunks_facies_cnn = np.array(chunks_facies_cnn, dtype=np.int32)-1
unique_facies = len(set(chunks_facies_cnn))
print unique_facies, set(chunks_facies_cnn)
print chunks_cnn.shape, chunks_cnn_test.shape
print chunks_facies_cnn.shape, chunks_facies_cnn_test.shape
In [165]:
X = chunks_cnn
y = chunks_facies_cnn
X = X.reshape((chunks_cnn.shape[0], chunks_cnn.shape[1], chunks_cnn.shape[2], 1))
y = np_utils.to_categorical(y)
N = 128
cnn = Sequential()
cnn.add(Convolution2D(N, 1, 5, border_mode="same",activation="relu",input_shape=(chunks_cnn.shape[1], chunks_cnn.shape[2], 1)))
cnn.add(MaxPooling2D(pool_size=(1, 2)))
cnn.add(Dropout(0.25))
cnn.add(Convolution2D(N, 1, 3, border_mode="same",activation="relu",input_shape=(chunks_cnn.shape[1], chunks_cnn.shape[2], 1)))
cnn.add(MaxPooling2D(pool_size=(1, 2)))
#cnn.add(Dropout(0.5))
cnn.add(Convolution2D(N, 2, 2, border_mode="same", activation="relu"))
#cnn.add(Convolution2D(N, 3, 1, border_mode="same", activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.8))
cnn.add(Flatten())
cnn.add(Dense(128, activation="relu"))
cnn.add(Dropout(0.5))
cnn.add(Dense(64, activation="relu"))
cnn.add(Dropout(0.5))
cnn.add(Dense(9, activation="softmax"))
cnn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['acc'])
In [166]:
cnn.fit(X, y, nb_epoch=50, validation_split=0.33, batch_size=32, verbose=1, show_accuracy=True, shuffle=True)
Out[166]:
In [167]:
y_predicted = cnn.predict(X, batch_size=32, verbose=1)
y_preds = []
for row in y_predicted:
index, value = max(enumerate(row), key=operator.itemgetter(1))
y_preds.append(index)
print ""
print confusion_matrix(chunks_facies_cnn, y_preds)
print f1_score(chunks_facies_cnn, y_preds, average='weighted')
In [168]:
X_test = chunks_cnn_test
X_test = X_test.reshape((chunks_cnn_test.shape[0], chunks_cnn_test.shape[1], chunks_cnn_test.shape[2], 1))
y_predicted = cnn.predict(X_test, batch_size=32, verbose=1)
y_preds = []
for row in y_predicted:
index, value = max(enumerate(row), key=operator.itemgetter(1))
y_preds.append(index)
y_preds = np.array(y_preds)+1
In [169]:
print y_preds
In [170]:
test_data = pd.read_csv("validation_data_nofacies_online.csv")
test_data['Facies'] = pd.Series(y_preds)
test_data.to_csv("validation_data_with_facies_new.csv")
In [171]:
print test_data.head()
In [ ]: