In [164]:
%matplotlib inline
import pandas as pd
import numpy as np
from pandas import set_option

from sklearn import preprocessing
from sklearn.metrics import confusion_matrix, f1_score

from collections import Counter
import operator

from keras.models import Model, Sequential
from keras.layers import Convolution2D, Dense, Input, Dropout, Flatten, MaxPooling2D, Activation
from keras.optimizers import Nadam
from keras.utils import np_utils
from keras.utils.np_utils import to_categorical

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
np.random.seed(42)

def accuracy(conf):
    total_correct = 0.
    nb_classes = conf.shape[0]
    for i in np.arange(0,nb_classes):
        total_correct += conf[i][i]
    acc = total_correct/sum(sum(conf))
    return acc

def label_facies(row, labels):
    return labels[ row['Facies'] -1]
    

set_option("display.max_rows", 10)
pd.options.mode.chained_assignment = None

filename = 'facies_vectors.csv'
training_data = pd.read_csv(filename)


training_data['Well Name'] = training_data['Well Name'].astype('category')
training_data['Formation'] = training_data['Formation'].astype('category')
training_data['Well Name'].unique()

# 1=sandstone  2=c_siltstone   3=f_siltstone 
# 4=marine_silt_shale 5=mudstone 6=wackestone 7=dolomite
# 8=packstone 9=bafflestone
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00',
       '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']

facies_labels = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS',
                 'WS', 'D','PS', 'BS']
#facies_color_map is a dictionary that maps facies labels
#to their respective colors
facies_color_map = {}
for ind, label in enumerate(facies_labels):
    facies_color_map[label] = facies_colors[ind]

training_data.loc[:,'FaciesLabels'] = training_data.apply(lambda row: label_facies(row, facies_labels), axis=1)

PE_mask = training_data['PE'].notnull().values

mean_pe = training_data['PE'].mean()
std_pe = training_data['PE'].std()
training_data['PE'] = (training_data['PE']-mean_pe)/std_pe
PE_mask = training_data['PE'].notnull().values

training_data['PE'] = training_data['PE'].fillna(value=0)

correct_facies_labels = training_data['Facies'].values

feature_vectors = training_data.drop(['Formation', 'FaciesLabels'], axis=1)#, 'RELPOS', 'NM_M', 'Depth', 'ILD_log10',  'DeltaPHI',   'PHIND'], axis=1)

well_labels = training_data[['Well Name', 'Facies']].values
data_vectors = feature_vectors.drop(['Well Name', 'Facies'], axis=1).values

scaler = preprocessing.StandardScaler().fit(data_vectors)
scaled_features = scaler.transform(data_vectors)

data_out = np.hstack([well_labels, scaled_features])

In [75]:
data = data_out
well_data = {}
well_names = list(set(data[:, 0]))
for name in well_names:
    well_data[name] = [[], []]
    
for row in data:
    well_data[row[0]][1].append(row[1])
    well_data[row[0]][0].append(list(row[2::]))

positive_lag = 10
negative_lag = 11

chunks_cnn = []
chunks_cnn_test = []
chunk_length = positive_lag+negative_lag+1 #were gonna predict middle facies
chunks_facies_cnn = []

for name in well_names:
    if name not in ['STUART', 'CRAWFORD']:
        test_well_data = well_data[name]
        log_values = np.array(test_well_data[0])
        log_values_padded = np.lib.pad(log_values, (negative_lag,positive_lag), 'edge')[:, negative_lag:-positive_lag]
        facies_values =  np.array(test_well_data[1])
        for i in range(log_values.shape[0]):
            chunk = log_values_padded[i:i+chunk_length, :]
            chunk_trans = chunk.T
            chunks_cnn.append(chunk_trans)
            chunks_facies_cnn.append(facies_values[i])
    else:
        test_well_data = well_data[name]
        log_values = np.array(test_well_data[0])
        log_values_padded = np.lib.pad(log_values, (negative_lag,positive_lag), 'edge')[:, negative_lag:-positive_lag]
        facies_values =  np.array(test_well_data[1])
        for i in range(log_values.shape[0]):
            chunk = log_values_padded[i:i+chunk_length, :]
            chunk_trans = chunk.T
            chunks_cnn_test.append(chunk_trans)

chunks_cnn = np.array(chunks_cnn)
chunks_cnn_test = np.array(chunks_cnn_test)

chunks_facies_cnn = np.array(chunks_facies_cnn, dtype=np.int32)-1

unique_facies = len(set(chunks_facies_cnn))
print unique_facies, set(chunks_facies_cnn)
print chunks_cnn.shape, chunks_cnn_test.shape
print chunks_facies_cnn.shape, chunks_facies_cnn_test.shape


9 set([0, 1, 2, 3, 4, 5, 6, 7, 8])
(4149, 8, 22) (830, 8, 22)
(4149,) (830,)

In [165]:
X = chunks_cnn
y = chunks_facies_cnn

X = X.reshape((chunks_cnn.shape[0], chunks_cnn.shape[1], chunks_cnn.shape[2], 1))

y = np_utils.to_categorical(y)

N = 128
cnn = Sequential()
cnn.add(Convolution2D(N, 1, 5, border_mode="same",activation="relu",input_shape=(chunks_cnn.shape[1], chunks_cnn.shape[2], 1)))
cnn.add(MaxPooling2D(pool_size=(1, 2)))
cnn.add(Dropout(0.25))
cnn.add(Convolution2D(N, 1, 3, border_mode="same",activation="relu",input_shape=(chunks_cnn.shape[1], chunks_cnn.shape[2], 1)))
cnn.add(MaxPooling2D(pool_size=(1, 2)))
#cnn.add(Dropout(0.5))
cnn.add(Convolution2D(N, 2, 2, border_mode="same", activation="relu"))
#cnn.add(Convolution2D(N, 3, 1, border_mode="same", activation="relu"))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.8))
cnn.add(Flatten())
cnn.add(Dense(128, activation="relu"))
cnn.add(Dropout(0.5))
cnn.add(Dense(64, activation="relu"))
cnn.add(Dropout(0.5))
cnn.add(Dense(9, activation="softmax"))
cnn.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['acc'])

In [166]:
cnn.fit(X, y, nb_epoch=50, validation_split=0.33, batch_size=32, verbose=1, show_accuracy=True, shuffle=True)


Train on 2779 samples, validate on 1370 samples
Epoch 1/50
2779/2779 [==============================] - 11s - loss: 2.0676 - acc: 0.2008 - val_loss: 1.9267 - val_acc: 0.3066
Epoch 2/50
2779/2779 [==============================] - 6s - loss: 1.8057 - acc: 0.3203 - val_loss: 1.5487 - val_acc: 0.3832
Epoch 3/50
2779/2779 [==============================] - 6s - loss: 1.5788 - acc: 0.3692 - val_loss: 1.4074 - val_acc: 0.4139
Epoch 4/50
2779/2779 [==============================] - 6s - loss: 1.4892 - acc: 0.3908 - val_loss: 1.3265 - val_acc: 0.4380
Epoch 5/50
2779/2779 [==============================] - 6s - loss: 1.4125 - acc: 0.4070 - val_loss: 1.2742 - val_acc: 0.5029
Epoch 6/50
2779/2779 [==============================] - 6s - loss: 1.3712 - acc: 0.4293 - val_loss: 1.2515 - val_acc: 0.5372
Epoch 7/50
2779/2779 [==============================] - 6s - loss: 1.3365 - acc: 0.4430 - val_loss: 1.2231 - val_acc: 0.5343
Epoch 8/50
2779/2779 [==============================] - 6s - loss: 1.3182 - acc: 0.4574 - val_loss: 1.1931 - val_acc: 0.5380
Epoch 9/50
2779/2779 [==============================] - 6s - loss: 1.2711 - acc: 0.4833 - val_loss: 1.1941 - val_acc: 0.5358
Epoch 10/50
2779/2779 [==============================] - 6s - loss: 1.2746 - acc: 0.4682 - val_loss: 1.2026 - val_acc: 0.5482
Epoch 11/50
2779/2779 [==============================] - 6s - loss: 1.2436 - acc: 0.4919 - val_loss: 1.1779 - val_acc: 0.5460
Epoch 12/50
2779/2779 [==============================] - 6s - loss: 1.2378 - acc: 0.4811 - val_loss: 1.1976 - val_acc: 0.5518
Epoch 13/50
2779/2779 [==============================] - 6s - loss: 1.2172 - acc: 0.5005 - val_loss: 1.1914 - val_acc: 0.5569
Epoch 14/50
2779/2779 [==============================] - 6s - loss: 1.1914 - acc: 0.5211 - val_loss: 1.1953 - val_acc: 0.5307
Epoch 15/50
2779/2779 [==============================] - 6s - loss: 1.1973 - acc: 0.5099 - val_loss: 1.1707 - val_acc: 0.5387
Epoch 16/50
2779/2779 [==============================] - 6s - loss: 1.1863 - acc: 0.5121 - val_loss: 1.1600 - val_acc: 0.5460
Epoch 17/50
2779/2779 [==============================] - 7s - loss: 1.1794 - acc: 0.5052 - val_loss: 1.1864 - val_acc: 0.5343
Epoch 18/50
2779/2779 [==============================] - 8s - loss: 1.1485 - acc: 0.5225 - val_loss: 1.1630 - val_acc: 0.5584
Epoch 19/50
2779/2779 [==============================] - 6s - loss: 1.1250 - acc: 0.5268 - val_loss: 1.1774 - val_acc: 0.5628
Epoch 20/50
2779/2779 [==============================] - 6s - loss: 1.1287 - acc: 0.5549 - val_loss: 1.1717 - val_acc: 0.5606
Epoch 21/50
2779/2779 [==============================] - 7s - loss: 1.1214 - acc: 0.5434 - val_loss: 1.1863 - val_acc: 0.5577
Epoch 22/50
2779/2779 [==============================] - 8s - loss: 1.1256 - acc: 0.5365 - val_loss: 1.1934 - val_acc: 0.5423
Epoch 23/50
2779/2779 [==============================] - 7s - loss: 1.0938 - acc: 0.5524 - val_loss: 1.1816 - val_acc: 0.5409
Epoch 24/50
2779/2779 [==============================] - 7s - loss: 1.0834 - acc: 0.5538 - val_loss: 1.1815 - val_acc: 0.5591
Epoch 25/50
2779/2779 [==============================] - 7s - loss: 1.0741 - acc: 0.5750 - val_loss: 1.1758 - val_acc: 0.5562
Epoch 26/50
2779/2779 [==============================] - 6s - loss: 1.0684 - acc: 0.5574 - val_loss: 1.1744 - val_acc: 0.5620
Epoch 27/50
2779/2779 [==============================] - 6s - loss: 1.0508 - acc: 0.5711 - val_loss: 1.1804 - val_acc: 0.5628
Epoch 28/50
2779/2779 [==============================] - 6s - loss: 1.0948 - acc: 0.5650 - val_loss: 1.1872 - val_acc: 0.5526
Epoch 29/50
2779/2779 [==============================] - 6s - loss: 1.0508 - acc: 0.5761 - val_loss: 1.1804 - val_acc: 0.5474
Epoch 30/50
2779/2779 [==============================] - 7s - loss: 1.0421 - acc: 0.5855 - val_loss: 1.1765 - val_acc: 0.5547
Epoch 31/50
2779/2779 [==============================] - 7s - loss: 1.0406 - acc: 0.5783 - val_loss: 1.1687 - val_acc: 0.5547
Epoch 32/50
2779/2779 [==============================] - 6s - loss: 1.0440 - acc: 0.5804 - val_loss: 1.1656 - val_acc: 0.5562
Epoch 33/50
2779/2779 [==============================] - 7s - loss: 1.0124 - acc: 0.5891 - val_loss: 1.1640 - val_acc: 0.5569
Epoch 34/50
2779/2779 [==============================] - 7s - loss: 1.0154 - acc: 0.5873 - val_loss: 1.1506 - val_acc: 0.5650
Epoch 35/50
2779/2779 [==============================] - 7s - loss: 1.0295 - acc: 0.5822 - val_loss: 1.1766 - val_acc: 0.5307
Epoch 36/50
2779/2779 [==============================] - 7s - loss: 1.0101 - acc: 0.5912 - val_loss: 1.2071 - val_acc: 0.5737
Epoch 37/50
2779/2779 [==============================] - 6s - loss: 1.0122 - acc: 0.6027 - val_loss: 1.1740 - val_acc: 0.5591
Epoch 38/50
2779/2779 [==============================] - 9s - loss: 0.9949 - acc: 0.6024 - val_loss: 1.2234 - val_acc: 0.5555
Epoch 39/50
2779/2779 [==============================] - 8s - loss: 0.9835 - acc: 0.6107 - val_loss: 1.1746 - val_acc: 0.5606
Epoch 40/50
2779/2779 [==============================] - 7s - loss: 0.9670 - acc: 0.6038 - val_loss: 1.1837 - val_acc: 0.5496
Epoch 41/50
2779/2779 [==============================] - 7s - loss: 0.9685 - acc: 0.6042 - val_loss: 1.1635 - val_acc: 0.5526
Epoch 42/50
2779/2779 [==============================] - 7s - loss: 0.9589 - acc: 0.6168 - val_loss: 1.1747 - val_acc: 0.5409
Epoch 43/50
2779/2779 [==============================] - 7s - loss: 0.9515 - acc: 0.6171 - val_loss: 1.1808 - val_acc: 0.5723
Epoch 44/50
2779/2779 [==============================] - 7s - loss: 0.9571 - acc: 0.6125 - val_loss: 1.1746 - val_acc: 0.5540
Epoch 45/50
2779/2779 [==============================] - 7s - loss: 0.9522 - acc: 0.6204 - val_loss: 1.1930 - val_acc: 0.5380
Epoch 46/50
2779/2779 [==============================] - 7s - loss: 0.9368 - acc: 0.6211 - val_loss: 1.1892 - val_acc: 0.5533
Epoch 47/50
2779/2779 [==============================] - 7s - loss: 0.9311 - acc: 0.6319 - val_loss: 1.2045 - val_acc: 0.5460
Epoch 48/50
2779/2779 [==============================] - 7s - loss: 0.9255 - acc: 0.6254 - val_loss: 1.1603 - val_acc: 0.5438
Epoch 49/50
2779/2779 [==============================] - 7s - loss: 0.9231 - acc: 0.6232 - val_loss: 1.1794 - val_acc: 0.5489
Epoch 50/50
2779/2779 [==============================] - 8s - loss: 0.9153 - acc: 0.6222 - val_loss: 1.2401 - val_acc: 0.5555
Out[166]:
<keras.callbacks.History at 0x7f222204cad0>

In [167]:
y_predicted = cnn.predict(X, batch_size=32, verbose=1)

y_preds = []
for row in y_predicted:
    index, value = max(enumerate(row), key=operator.itemgetter(1))
    y_preds.append(index)

print ""    
print confusion_matrix(chunks_facies_cnn, y_preds)
print f1_score(chunks_facies_cnn, y_preds, average='weighted')


4149/4149 [==============================] - 3s     

[[171  85  12   0   0   0   0   0   0]
 [ 27 741 165   0   0   5   0   2   0]
 [  0 205 563   0   0   6   1   5   0]
 [  0   2   3 157   9  80   1  19   0]
 [  0   4   8  23  81 108   4  68   0]
 [  0   0   2  23  35 350   8 164   0]
 [  0   0   1   0   0   6  83  51   0]
 [  0   0   9   4   1 133  10 523   6]
 [  0   0   0   0   0  19   1  31 134]]
0.671977638709

Test Our Model


In [168]:
X_test = chunks_cnn_test

X_test = X_test.reshape((chunks_cnn_test.shape[0], chunks_cnn_test.shape[1], chunks_cnn_test.shape[2], 1))


y_predicted = cnn.predict(X_test, batch_size=32, verbose=1)

y_preds = []
for row in y_predicted:
    index, value = max(enumerate(row), key=operator.itemgetter(1))
    y_preds.append(index)
y_preds = np.array(y_preds)+1


830/830 [==============================] - 0s     

In [169]:
print y_preds


[2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 3 3 3 3 8 8 8 8 8 8 8 8 6 6 6 6 4 4 4 6 4 6 6 6 6 6 6 6 6 6 8 8 8 8 8
 8 8 8 8 8 8 8 8 6 6 6 6 6 8 8 8 8 8 8 6 6 6 6 6 6 6 6 6 6 6 4 4 6 8 8 8 8
 8 8 8 6 6 6 6 6 6 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3
 3 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 8 8 8 8 8 8 8 8 6 6 6 6 6 6 3 3 3
 3 3 3 2 2 2 2 2 2 2 2 2 3 3 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 8 8 8 8 8 8 8 8 8 8
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 3 8 8 8 8 8 8 8 8 8 6 6 6 6 3 3 3 3 6
 7 8 8 8 8 8 8 8 8 8 8 8 7 7 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 6 6 6 6 6
 6 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3
 3 3 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 6 8 8 8 8 8 8
 8 8 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 4 4 4 6 6 6 6 4 4 6 6 4 4 4 6
 6 6 6 6 6 8 8 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 2 2 2 2 1 1 1 1 1 1 1 1 1
 1 6 6 6 6 6 8 8 8 8 8 8 8 8 8 8 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 8 8 8 8
 8 8 8 8 8 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 8 8 8 6 6 6 3 3 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 3 3 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 6
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 3 8 8 8 8 8 8 8 8 8 8 6 6 6 6 6 6 6
 3 3 3 3 2 2 2 2 2 3 3 3 3 8 8 8 8 8 8 8 8 8 6 7 7 7 7 7 7 7 7 7 7 7 7 7 8
 8 8 8 8 8 8 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 7 8 8 8 8 8 8 8 8 8 8 8 6 6 3 3
 3 3 3 3 3 2 2 2 2 2 2 2 2 2 2 2]

In [170]:
test_data = pd.read_csv("validation_data_nofacies_online.csv")
test_data['Facies'] = pd.Series(y_preds)
test_data.to_csv("validation_data_with_facies_new.csv")

In [171]:
print test_data.head()


  Formation Well Name   Depth      GR  ILD_log10  DeltaPHI  PHIND     PE  \
0     A1 SH    STUART  2808.0  66.276      0.630       3.3  10.65  3.591   
1     A1 SH    STUART  2808.5  77.252      0.585       6.5  11.95  3.341   
2     A1 SH    STUART  2809.0  82.899      0.566       9.4  13.60  3.064   
3     A1 SH    STUART  2809.5  80.671      0.593       9.5  13.25  2.977   
4     A1 SH    STUART  2810.0  75.971      0.638       8.7  12.35  3.020   

   NM_M  RELPOS  Facies  
0     1   1.000       2  
1     1   0.978       2  
2     1   0.956       2  
3     1   0.933       2  
4     1   0.911       2  

In [ ]: