Houmath's excellent notebook + median filtering



In [2]:

    
from __future__ import division
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.figsize']=(20.0,10.0)
inline_rc = dict(mpl.rcParams)

import pandas as pd
import numpy as np
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import LeavePGroupsOut
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from scipy.signal import medfilt

from pandas.tools.plotting import scatter_matrix

import matplotlib.colors as colors

import xgboost as xgb
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from classification_utilities import display_cm, display_adj_cm
from sklearn.model_selection import GridSearchCV


from sklearn.model_selection import validation_curve
from sklearn.datasets import load_svmlight_files

from xgboost.sklearn import XGBClassifier
from scipy.sparse import vstack
from sklearn import metrics
from scipy.spatial.distance import correlation
from scipy.signal import medfilt, gaussian
seed = 123
np.random.seed(seed)



In [3]:

    
# Parameters
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
facies_names = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']



In [4]:

    
# Load data from file
data = pd.read_csv('facies_vectors.csv')



In [5]:

    
X = data[feature_names].values
y = data['Facies'].values



In [6]:

    
well = data['Well Name'].values
depth = data['Depth'].values



In [7]:

    
# Define function for plotting feature statistics
def plot_feature_stats(X, y, feature_names, facies_colors, facies_names):
    
    # Remove NaN
    nan_idx = np.any(np.isnan(X), axis=1)
    X = X[np.logical_not(nan_idx), :]
    y = y[np.logical_not(nan_idx)]
    
    # Merge features and labels into a single DataFrame
    features = pd.DataFrame(X, columns=feature_names)
    labels = pd.DataFrame(y, columns=['Facies'])
    for f_idx, facies in enumerate(facies_names):
        labels[labels[:] == f_idx] = facies
    data = pd.concat((labels, features), axis=1)

    # Plot features statistics
    facies_color_map = {}
    for ind, label in enumerate(facies_names):
        facies_color_map[label] = facies_colors[ind]

    sns.pairplot(data, hue='Facies', palette=facies_color_map, hue_order=list(reversed(facies_names)))



In [8]:

    
# Feature distribution
plot_feature_stats(X, y, feature_names, facies_colors, facies_names)
mpl.rcParams.update(inline_rc)



In [9]:

    
# Facies per well
for w_idx, w in enumerate(np.unique(well)):
    ax = plt.subplot(3, 4, w_idx+1)
    hist = np.histogram(y[well == w], bins=np.arange(len(facies_names)+1)+.5)
    plt.bar(np.arange(len(hist[0])), hist[0], color=facies_colors, align='center')
    ax.set_xticks(np.arange(len(hist[0])))
    ax.set_xticklabels(facies_names)
    ax.set_title(w)



In [10]:

    
# Features per well
for w_idx, w in enumerate(np.unique(well)):
    ax = plt.subplot(3, 4, w_idx+1)
    hist = np.logical_not(np.any(np.isnan(X[well == w, :]), axis=0))
    plt.bar(np.arange(len(hist)), hist, color=facies_colors, align='center')
    ax.set_xticks(np.arange(len(hist)))
    ax.set_xticklabels(feature_names)
    ax.set_yticks([0, 1])
    ax.set_yticklabels(['miss', 'hit'])
    ax.set_title(w)



In [11]:

    
reg = RandomForestRegressor(max_features='sqrt', n_estimators=50)
DataImpAll = data[feature_names].copy()
DataImp = DataImpAll.dropna(axis = 0, inplace=False)
Ximp=DataImp.loc[:, DataImp.columns != 'PE']
Yimp=DataImp.loc[:, 'PE']
reg.fit(Ximp, Yimp)
X[np.array(DataImpAll.PE.isnull()),4] = reg.predict(DataImpAll.loc[DataImpAll.PE.isnull(),:].drop('PE',axis=1,inplace=False))



In [12]:

    
# Feature windows concatenation function
def augment_features_window(X, N_neig):
    
    # Parameters
    N_row = X.shape[0]
    N_feat = X.shape[1]

    # Zero padding
    X = np.vstack((np.zeros((N_neig, N_feat)), X, (np.zeros((N_neig, N_feat)))))

    # Loop over windows
    X_aug = np.zeros((N_row, N_feat*(2*N_neig+1)))
    for r in np.arange(N_row)+N_neig:
        this_row = []
        for c in np.arange(-N_neig,N_neig+1):
            this_row = np.hstack((this_row, X[r+c]))
        X_aug[r-N_neig] = this_row

    return X_aug



In [13]:

    
# Feature gradient computation function
def augment_features_gradient(X, depth):
    
    # Compute features gradient
    d_diff = np.diff(depth).reshape((-1, 1))
    d_diff[d_diff==0] = 0.001
    X_diff = np.diff(X, axis=0)
    X_grad = X_diff / d_diff
        
    # Compensate for last missing value
    X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))
    
    return X_grad

# Feature gradient computation function
def augment_features_median(X, dist=3):
    #print(X)
    X_grad = None
    X_out = np.zeros((X.shape[0], 7))
    for i in range(X.shape[1]):
        X_out[:, i] = medfilt(X[:, i], dist)
    # Compensate for last missing value
    #X_out = np.array(X_out)
    #X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))
    print(X_out.shape)
    return X_out



In [14]:

    
# Feature augmentation function
def augment_features(X, well, depth, N_neig=1):
    print(X.shape)
    # Augment features
    X_aug = np.zeros((X.shape[0], 42))#X.shape[1]*(N_neig*2+2)))
    for w in np.unique(well):
        w_idx = np.where(well == w)[0]
        X_aug_win = augment_features_window(X[w_idx, :], N_neig)
        X_aug_grad = augment_features_gradient(X[w_idx, :], depth[w_idx])
        
        X_aug_cov = augment_features_median(X[w_idx, :])
        X_aug_grad_med = augment_features_median(X_aug_grad, dist=7)
        print(X_aug_win.shape, X_aug_grad.shape, X_aug_cov.shape)
        X_aug[w_idx, :] = np.concatenate((X_aug_win, X_aug_grad, X_aug_cov, X_aug_grad_med), axis=1)
    
    # Find padded rows
    padded_rows = np.unique(np.where(X_aug[:, 0:7] == np.zeros((1, 7)))[0])
    
    return X_aug, padded_rows



In [15]:

    
X_aug, padded_rows = augment_features(X, well, depth)









    



(4149, 7)
(466, 7)
(466, 7)
(466, 21) (466, 7) (466, 7)
(404, 7)
(404, 7)
(404, 21) (404, 7) (404, 7)
(501, 7)
(501, 7)
(501, 21) (501, 7) (501, 7)
(439, 7)
(439, 7)
(439, 21) (439, 7) (439, 7)
(461, 7)
(461, 7)
(461, 21) (461, 7) (461, 7)
(463, 7)
(463, 7)
(463, 21) (463, 7) (463, 7)
(415, 7)
(415, 7)
(415, 21) (415, 7) (415, 7)
(80, 7)
(80, 7)
(80, 21) (80, 7) (80, 7)
(449, 7)
(449, 7)
(449, 21) (449, 7) (449, 7)
(471, 7)
(471, 7)
(471, 21) (471, 7) (471, 7)



In [16]:

    
# Initialize model selection methods
lpgo = LeavePGroupsOut(2)

# Generate splits
split_list = []
for train, val in lpgo.split(X, y, groups=data['Well Name']):
    hist_tr = np.histogram(y[train], bins=np.arange(len(facies_names)+1)+.5)
    hist_val = np.histogram(y[val], bins=np.arange(len(facies_names)+1)+.5)
    if np.all(hist_tr[0] != 0) & np.all(hist_val[0] != 0):
        split_list.append({'train':train, 'val':val})
            
# Print splits
for s, split in enumerate(split_list):
    print('Split %d' % s)
    print('    training:   %s' % (data['Well Name'][split['train']].unique()))
    print('    validation: %s' % (data['Well Name'][split['val']].unique()))









    



Split 0
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['ALEXANDER D' 'CHURCHMAN BIBLE']
Split 1
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'Recruit F9' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'CROSS H CATTLE']
Split 2
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'KIMZEY A']
Split 3
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'NOLAN']
Split 4
    training:   ['SHRIMPLIN' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'SHANKLE']
Split 5
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['CROSS H CATTLE' 'CHURCHMAN BIBLE']
Split 6
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['KIMZEY A' 'CHURCHMAN BIBLE']
Split 7
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['LUKE G U' 'CHURCHMAN BIBLE']
Split 8
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NOLAN' 'Recruit F9']
    validation: ['NEWBY' 'CHURCHMAN BIBLE']
Split 9
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY']
    validation: ['NOLAN' 'CHURCHMAN BIBLE']
Split 10
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NOLAN' 'NEWBY']
    validation: ['Recruit F9' 'CHURCHMAN BIBLE']
Split 11
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['SHANKLE' 'CHURCHMAN BIBLE']
Split 12
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['SHRIMPLIN' 'CHURCHMAN BIBLE']
Split 13
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'CROSS H CATTLE']
Split 14
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['CROSS H CATTLE' 'NEWBY']
Split 15
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['CROSS H CATTLE' 'Recruit F9']
Split 16
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'Recruit F9' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'CROSS H CATTLE']
Split 17
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['LUKE G U' 'KIMZEY A']
Split 18
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'NEWBY']
Split 19
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'NOLAN']
Split 20
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'Recruit F9']
Split 21
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'KIMZEY A']
Split 22
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'KIMZEY A']
Split 23
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['NOLAN' 'NEWBY']
Split 24
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'NEWBY']
Split 25
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['NOLAN' 'Recruit F9']
Split 26
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'NOLAN']
Split 27
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'Recruit F9']
Split 28
    training:   ['ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'SHANKLE']



In [17]:

    
# Parameters search grid (uncomment parameters for full grid search... may take a lot of time)
md_grid = [3]
mcw_grid = [1]
gamma_grid = [0.3]  
ss_grid = [0.7] 
csb_grid = [0.8]
alpha_grid =[0.2]
lr_grid = [0.05]
ne_grid = [200]
param_grid = []
for N in md_grid:
    for M in mcw_grid:
        for S in gamma_grid:
            for L in ss_grid:
                for K in csb_grid:
                    for P in alpha_grid:
                        for R in lr_grid:
                            for E in ne_grid:
                                param_grid.append({'maxdepth':N, 
                                                   'minchildweight':M, 
                                                   'gamma':S, 
                                                   'subsample':L,
                                                   'colsamplebytree':K,
                                                   'alpha':P,
                                                   'learningrate':R,
                                                   'n_estimators':E})



In [18]:

    
# Train and test a classifier
def train_and_test(X_tr, y_tr, X_v,well_v):
    
    #Feature normalization
    scaler = preprocessing.RobustScaler(quantile_range=(25.0, 75.0)).fit(X_tr)
    X_tr = scaler.transform(X_tr)
    X_v = scaler.transform(X_v)

    print("lel")
    clf.fit(X_tr, y_tr)
    
    # Test classifier
    y_v_hat = clf.predict(X_v)
    
    # Clean isolated facies for each well
    for w in np.unique(well_v):
        y_v_hat[well_v==w] = medfilt(y_v_hat[well_v==w], kernel_size=5)
    
    return y_v_hat



In [109]:

    
# For each set of parameters
score_param = []
for param in param_grid:
    
    clf = OneVsOneClassifier(XGBClassifier(
            learning_rate = param['learningrate'],
            n_estimators=param['n_estimators'],
            max_depth=param['maxdepth'],
            min_child_weight=param['minchildweight'],
            gamma = param['gamma'],
            subsample=param['subsample'],
            colsample_bytree=param['colsamplebytree'],
            reg_alpha = param['alpha'],
            nthread =1,
            seed = seed, silent=True
        ) , n_jobs=1)
    # For each data split
    score_split = []
    for split in split_list:
    
        # Remove padded rows
        split_train_no_pad = np.setdiff1d(split['train'], padded_rows)
        
        # Select training and validation data from current split
        X_tr = X_aug[split_train_no_pad, :]
        X_v = X_aug[split['val'], :]
        y_tr = y[split_train_no_pad]
        y_v = y[split['val']]
        
        # Select well labels for validation data
        well_v = well[split['val']]

        # Train and test
        y_v_hat = train_and_test(X_tr, y_tr, X_v, well_v)
        
        score = f1_score(y_v, y_v_hat, average='micro')
        print("cur score", score)
        score_split.append(score)
        print("running mean", np.mean(score_split))
        
    # Average score for this param
    score_param.append(np.mean(score_split))
    print('F1 score = %.3f %s' % (score_param[-1], param))
          
# Best set of parameters
best_idx = np.argmax(score_param)
param_best = param_grid[best_idx]
score_best = score_param[best_idx]
print('\nBest F1 score = %.3f %s' % (score_best, param_best))









    



lel
cur score 0.608045977011
running mean 0.608045977011
lel
cur score 0.501551189245
running mean 0.554798583128
lel
cur score 0.569060773481
running mean 0.559552646579
lel
cur score 0.594778660613
running mean 0.568359150088
lel
cur score 0.592349726776
running mean 0.573157265425
lel
cur score 0.500552486188
running mean 0.561056468886
lel
cur score 0.55753262159
running mean 0.560553062129
lel
cur score 0.615028901734
running mean 0.56736254208
lel
cur score 0.516724336794
running mean 0.561736074826
lel
cur score 0.567765567766
running mean 0.56233902412
lel
cur score 0.417355371901
running mean 0.5491586921
lel
cur score 0.586166471278
running mean 0.552242673698
lel
cur score 0.616
running mean 0.557147083413
lel
cur score 0.485106382979
running mean 0.552001319097
lel
cur score 0.505186721992
running mean 0.548880345956
lel
cur score 0.471600688468
running mean 0.544050367363
lel
cur score 0.497942386831
running mean 0.541338133214
lel
cur score 0.605555555556
running mean 0.544905767789
lel
cur score 0.563192904656
running mean 0.545868248677
lel
cur score 0.565573770492
running mean 0.546853524767
lel
cur score 0.576107899807
running mean 0.548246590246
lel
cur score 0.563063063063
running mean 0.548920066283
lel
cur score 0.595604395604
running mean 0.550949819731
lel
cur score 0.571753986333
running mean 0.551816660007
lel
cur score 0.59100877193
running mean 0.553384344483
lel
cur score 0.614141414141
running mean 0.555721154855
lel
cur score 0.581264108352
running mean 0.55666719017
lel
cur score 0.614366729679
running mean 0.558727888009
lel
cur score 0.584782608696
running mean 0.559626326654
F1 score = 0.560 {'gamma': 0.3, 'colsamplebytree': 0.8, 'alpha': 0.2, 'minchildweight': 1, 'n_estimators': 200, 'subsample': 0.7, 'maxdepth': 3, 'learningrate': 0.08}

Best F1 score = 0.560 {'gamma': 0.3, 'colsamplebytree': 0.8, 'alpha': 0.2, 'minchildweight': 1, 'n_estimators': 200, 'subsample': 0.7, 'maxdepth': 3, 'learningrate': 0.08}



In [110]:

    
print('Min Max', np.min(score_split), np.max(score_split))









    



Min Max 0.417355371901 0.616



In [19]:

    
for param in param_grid:
    
    clf = OneVsOneClassifier(XGBClassifier(
            learning_rate = param['learningrate'],
            n_estimators=param['n_estimators'],
            max_depth=param['maxdepth'],
            min_child_weight=param['minchildweight'],
            gamma = param['gamma'],
            subsample=param['subsample'],
            colsample_bytree=param['colsamplebytree'],
            reg_alpha = param['alpha'],
            nthread =4,
            seed = seed,
        ) , n_jobs=-1)



In [20]:

    
clf









    Out[20]:





OneVsOneClassifier(estimator=XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.8,
       gamma=0.3, learning_rate=0.05, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=200, nthread=4,
       objective='binary:logistic', reg_alpha=0.2, reg_lambda=1,
       scale_pos_weight=1, seed=123, silent=True, subsample=0.7),
          n_jobs=-1)



In [21]:

    
# Load data from file
test_data = pd.read_csv('../validation_data_nofacies.csv')



In [22]:

    
# Prepare training data
X_tr = X
y_tr = y

# Augment features
X_tr, padded_rows = augment_features(X_tr, well, depth)

# Removed padded rows
X_tr = np.delete(X_tr, padded_rows, axis=0)
y_tr = np.delete(y_tr, padded_rows, axis=0)









    



(4149, 7)
(466, 7)
(466, 7)
(466, 21) (466, 7) (466, 7)
(404, 7)
(404, 7)
(404, 21) (404, 7) (404, 7)
(501, 7)
(501, 7)
(501, 21) (501, 7) (501, 7)
(439, 7)
(439, 7)
(439, 21) (439, 7) (439, 7)
(461, 7)
(461, 7)
(461, 21) (461, 7) (461, 7)
(463, 7)
(463, 7)
(463, 21) (463, 7) (463, 7)
(415, 7)
(415, 7)
(415, 21) (415, 7) (415, 7)
(80, 7)
(80, 7)
(80, 21) (80, 7) (80, 7)
(449, 7)
(449, 7)
(449, 21) (449, 7) (449, 7)
(471, 7)
(471, 7)
(471, 21) (471, 7) (471, 7)



In [23]:

    
# Prepare test data
well_ts = test_data['Well Name'].values
depth_ts = test_data['Depth'].values
X_ts = test_data[feature_names].values

# Augment features
X_ts, padded_rows = augment_features(X_ts, well_ts, depth_ts)









    



(830, 7)
(356, 7)
(356, 7)
(356, 21) (356, 7) (356, 7)
(474, 7)
(474, 7)
(474, 21) (474, 7) (474, 7)



In [24]:

    
# Predict test labels
y_ts_hat = train_and_test(X_tr, y_tr, X_ts, well_ts)

lel



In [25]:

    
# Save predicted labels
test_data['Facies'] = y_ts_hat
test_data.to_csv('Prediction_houmath_edit.csv')



In [26]:

    
test_data









    Out[26]:






  
    
      
      Formation
      Well Name
      Depth
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      NM_M
      RELPOS
      Facies
    
  
  
    
      0
      A1 SH
      STUART
      2808.0
      66.276
      0.630
      3.300
      10.650
      3.591
      1
      1.000
      3
    
    
      1
      A1 SH
      STUART
      2808.5
      77.252
      0.585
      6.500
      11.950
      3.341
      1
      0.978
      3
    
    
      2
      A1 SH
      STUART
      2809.0
      82.899
      0.566
      9.400
      13.600
      3.064
      1
      0.956
      3
    
    
      3
      A1 SH
      STUART
      2809.5
      80.671
      0.593
      9.500
      13.250
      2.977
      1
      0.933
      3
    
    
      4
      A1 SH
      STUART
      2810.0
      75.971
      0.638
      8.700
      12.350
      3.020
      1
      0.911
      3
    
    
      5
      A1 SH
      STUART
      2810.5
      73.955
      0.667
      6.900
      12.250
      3.086
      1
      0.889
      3
    
    
      6
      A1 SH
      STUART
      2811.0
      77.962
      0.674
      6.500
      12.450
      3.092
      1
      0.867
      3
    
    
      7
      A1 SH
      STUART
      2811.5
      83.894
      0.667
      6.300
      12.650
      3.123
      1
      0.844
      3
    
    
      8
      A1 SH
      STUART
      2812.0
      84.424
      0.653
      6.700
      13.050
      3.121
      1
      0.822
      3
    
    
      9
      A1 SH
      STUART
      2812.5
      83.160
      0.642
      7.300
      12.950
      3.127
      1
      0.800
      3
    
    
      10
      A1 SH
      STUART
      2813.0
      79.063
      0.651
      7.300
      12.050
      3.147
      1
      0.778
      2
    
    
      11
      A1 SH
      STUART
      2813.5
      69.002
      0.677
      6.200
      10.800
      3.096
      1
      0.756
      2
    
    
      12
      A1 SH
      STUART
      2814.0
      63.983
      0.690
      4.400
      9.700
      3.103
      1
      0.733
      2
    
    
      13
      A1 SH
      STUART
      2814.5
      61.797
      0.675
      3.500
      9.150
      3.101
      1
      0.711
      1
    
    
      14
      A1 SH
      STUART
      2815.0
      61.372
      0.646
      2.800
      9.300
      3.065
      1
      0.689
      1
    
    
      15
      A1 SH
      STUART
      2815.5
      63.535
      0.621
      2.800
      9.800
      2.982
      1
      0.667
      1
    
    
      16
      A1 SH
      STUART
      2816.0
      65.126
      0.600
      3.300
      10.550
      2.914
      1
      0.644
      1
    
    
      17
      A1 SH
      STUART
      2816.5
      75.930
      0.576
      3.400
      11.900
      2.845
      1
      0.600
      1
    
    
      18
      A1 SH
      STUART
      2817.0
      85.077
      0.584
      4.400
      12.900
      2.854
      1
      0.578
      2
    
    
      19
      A1 SH
      STUART
      2817.5
      89.459
      0.598
      6.600
      13.500
      2.986
      1
      0.556
      2
    
    
      20
      A1 SH
      STUART
      2818.0
      88.619
      0.610
      7.200
      14.800
      2.988
      1
      0.533
      2
    
    
      21
      A1 SH
      STUART
      2818.5
      81.593
      0.636
      6.400
      13.900
      2.998
      1
      0.511
      2
    
    
      22
      A1 SH
      STUART
      2819.0
      66.595
      0.702
      2.800
      11.400
      2.988
      1
      0.489
      2
    
    
      23
      A1 SH
      STUART
      2819.5
      55.081
      0.789
      2.700
      8.150
      3.028
      1
      0.467
      1
    
    
      24
      A1 SH
      STUART
      2820.0
      48.112
      0.840
      1.000
      7.500
      3.073
      1
      0.444
      1
    
    
      25
      A1 SH
      STUART
      2820.5
      43.730
      0.846
      0.400
      7.100
      3.146
      1
      0.422
      1
    
    
      26
      A1 SH
      STUART
      2821.0
      44.097
      0.840
      0.700
      6.650
      3.205
      1
      0.400
      1
    
    
      27
      A1 SH
      STUART
      2821.5
      46.839
      0.842
      0.800
      6.600
      3.254
      1
      0.378
      1
    
    
      28
      A1 SH
      STUART
      2822.0
      50.348
      0.843
      1.100
      6.750
      3.230
      1
      0.356
      1
    
    
      29
      A1 SH
      STUART
      2822.5
      57.129
      0.822
      2.200
      7.300
      3.237
      1
      0.333
      1
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      800
      B5 LM
      CRAWFORD
      3146.0
      167.803
      -0.219
      4.270
      23.370
      3.810
      2
      0.190
      7
    
    
      801
      B5 LM
      CRAWFORD
      3146.5
      151.183
      -0.057
      0.925
      17.125
      4.153
      2
      0.172
      8
    
    
      802
      B5 LM
      CRAWFORD
      3147.0
      123.264
      0.067
      0.285
      14.215
      4.404
      2
      0.155
      8
    
    
      803
      B5 LM
      CRAWFORD
      3147.5
      108.569
      0.234
      0.705
      12.225
      4.499
      2
      0.138
      8
    
    
      804
      B5 LM
      CRAWFORD
      3148.0
      101.072
      0.427
      1.150
      10.760
      4.392
      2
      0.121
      8
    
    
      805
      B5 LM
      CRAWFORD
      3148.5
      91.748
      0.625
      1.135
      9.605
      4.254
      2
      0.103
      6
    
    
      806
      B5 LM
      CRAWFORD
      3149.0
      83.794
      0.749
      2.075
      7.845
      4.023
      2
      0.086
      6
    
    
      807
      B5 LM
      CRAWFORD
      3149.5
      83.794
      0.749
      2.075
      7.845
      4.023
      2
      0.086
      6
    
    
      808
      B5 LM
      CRAWFORD
      3150.0
      79.722
      0.771
      2.890
      6.640
      4.040
      2
      0.069
      6
    
    
      809
      B5 LM
      CRAWFORD
      3150.5
      76.334
      0.800
      2.960
      6.290
      3.997
      2
      0.052
      8
    
    
      810
      B5 LM
      CRAWFORD
      3151.0
      73.631
      0.800
      2.680
      6.690
      3.828
      2
      0.034
      8
    
    
      811
      B5 LM
      CRAWFORD
      3151.5
      76.865
      0.772
      2.420
      8.600
      3.535
      2
      0.017
      8
    
    
      812
      C SH
      CRAWFORD
      3152.0
      79.924
      0.752
      2.620
      11.510
      3.148
      1
      1.000
      3
    
    
      813
      C SH
      CRAWFORD
      3152.5
      82.199
      0.728
      3.725
      14.555
      2.964
      1
      0.972
      3
    
    
      814
      C SH
      CRAWFORD
      3153.0
      79.953
      0.700
      5.610
      16.930
      2.793
      1
      0.944
      3
    
    
      815
      C SH
      CRAWFORD
      3153.5
      75.881
      0.673
      6.300
      17.570
      2.969
      1
      0.917
      3
    
    
      816
      C SH
      CRAWFORD
      3154.0
      67.470
      0.652
      4.775
      15.795
      3.282
      1
      0.889
      2
    
    
      817
      C SH
      CRAWFORD
      3154.5
      58.832
      0.640
      4.315
      13.575
      3.642
      1
      0.861
      2
    
    
      818
      C SH
      CRAWFORD
      3155.0
      57.946
      0.631
      3.595
      11.305
      3.893
      1
      0.833
      2
    
    
      819
      C SH
      CRAWFORD
      3155.5
      65.755
      0.625
      3.465
      10.355
      3.911
      1
      0.806
      2
    
    
      820
      C SH
      CRAWFORD
      3156.0
      69.445
      0.617
      3.390
      11.540
      3.820
      1
      0.778
      2
    
    
      821
      C SH
      CRAWFORD
      3156.5
      73.389
      0.608
      3.625
      12.775
      3.620
      1
      0.750
      2
    
    
      822
      C SH
      CRAWFORD
      3157.0
      77.115
      0.605
      4.140
      13.420
      3.467
      1
      0.722
      2
    
    
      823
      C SH
      CRAWFORD
      3157.5
      79.840
      0.596
      4.875
      13.825
      3.360
      1
      0.694
      2
    
    
      824
      C SH
      CRAWFORD
      3158.0
      82.616
      0.577
      5.235
      14.845
      3.207
      1
      0.667
      2
    
    
      825
      C SH
      CRAWFORD
      3158.5
      86.078
      0.554
      5.040
      16.150
      3.161
      1
      0.639
      2
    
    
      826
      C SH
      CRAWFORD
      3159.0
      88.855
      0.539
      5.560
      16.750
      3.118
      1
      0.611
      2
    
    
      827
      C SH
      CRAWFORD
      3159.5
      90.490
      0.530
      6.360
      16.780
      3.168
      1
      0.583
      2
    
    
      828
      C SH
      CRAWFORD
      3160.0
      90.975
      0.522
      7.035
      16.995
      3.154
      1
      0.556
      2
    
    
      829
      C SH
      CRAWFORD
      3160.5
      90.108
      0.513
      7.505
      17.595
      3.125
      1
      0.528
      2
    
  

830 rows × 11 columns



In [ ]:



In [27]:

    
def make_facies_log_plot(logs, facies_colors):
    #make sure logs are sorted by depth
    logs = logs.sort_values(by='Depth')
    cmap_facies = colors.ListedColormap(
            facies_colors[0:len(facies_colors)], 'indexed')
    
    ztop=logs.Depth.min(); zbot=logs.Depth.max()
    
    cluster=np.repeat(np.expand_dims(logs['Facies'].values,1), 100, 1)
    
    f, ax = plt.subplots(nrows=1, ncols=6, figsize=(8, 12))
    ax[0].plot(logs.GR, logs.Depth, '-g')
    ax[1].plot(logs.ILD_log10, logs.Depth, '-')
    ax[2].plot(logs.DeltaPHI, logs.Depth, '-', color='0.5')
    ax[3].plot(logs.PHIND, logs.Depth, '-', color='r')
    ax[4].plot(logs.PE, logs.Depth, '-', color='black')
    im=ax[5].imshow(cluster, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=9)
    
    divider = make_axes_locatable(ax[5])
    cax = divider.append_axes("right", size="20%", pad=0.05)
    cbar=plt.colorbar(im, cax=cax)
    cbar.set_label((17*' ').join([' SS ', 'CSiS', 'FSiS', 
                                'SiSh', ' MS ', ' WS ', ' D  ', 
                                ' PS ', ' BS ']))
    cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')
    
    for i in range(len(ax)-1):
        ax[i].set_ylim(ztop,zbot)
        ax[i].invert_yaxis()
        ax[i].grid()
        ax[i].locator_params(axis='x', nbins=3)
    
    ax[0].set_xlabel("GR")
    ax[0].set_xlim(logs.GR.min(),logs.GR.max())
    ax[1].set_xlabel("ILD_log10")
    ax[1].set_xlim(logs.ILD_log10.min(),logs.ILD_log10.max())
    ax[2].set_xlabel("DeltaPHI")
    ax[2].set_xlim(logs.DeltaPHI.min(),logs.DeltaPHI.max())
    ax[3].set_xlabel("PHIND")
    ax[3].set_xlim(logs.PHIND.min(),logs.PHIND.max())
    ax[4].set_xlabel("PE")
    ax[4].set_xlim(logs.PE.min(),logs.PE.max())
    ax[5].set_xlabel('Facies')
    
    ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])
    ax[4].set_yticklabels([]); ax[5].set_yticklabels([])
    ax[5].set_xticklabels([])
    f.suptitle('Well: %s'%logs.iloc[0]['Well Name'], fontsize=14,y=0.94)



In [28]:

    
from mpl_toolkits.axes_grid1 import make_axes_locatable



In [29]:

    
# Plot predicted labels
make_facies_log_plot(
    test_data[test_data['Well Name'] == 'STUART'],
    facies_colors=facies_colors)

make_facies_log_plot(
    test_data[test_data['Well Name'] == 'CRAWFORD'],
    facies_colors=facies_colors)
mpl.rcParams.update(inline_rc)



In [ ]:

	Formation	Well Name	Depth	GR	ILD_log10	DeltaPHI	PHIND	PE	NM_M	RELPOS	Facies
0	A1 SH	STUART	2808.0	66.276	0.630	3.300	10.650	3.591	1	1.000	3
1	A1 SH	STUART	2808.5	77.252	0.585	6.500	11.950	3.341	1	0.978	3
2	A1 SH	STUART	2809.0	82.899	0.566	9.400	13.600	3.064	1	0.956	3
3	A1 SH	STUART	2809.5	80.671	0.593	9.500	13.250	2.977	1	0.933	3
4	A1 SH	STUART	2810.0	75.971	0.638	8.700	12.350	3.020	1	0.911	3
5	A1 SH	STUART	2810.5	73.955	0.667	6.900	12.250	3.086	1	0.889	3
6	A1 SH	STUART	2811.0	77.962	0.674	6.500	12.450	3.092	1	0.867	3
7	A1 SH	STUART	2811.5	83.894	0.667	6.300	12.650	3.123	1	0.844	3
8	A1 SH	STUART	2812.0	84.424	0.653	6.700	13.050	3.121	1	0.822	3
9	A1 SH	STUART	2812.5	83.160	0.642	7.300	12.950	3.127	1	0.800	3
10	A1 SH	STUART	2813.0	79.063	0.651	7.300	12.050	3.147	1	0.778	2
11	A1 SH	STUART	2813.5	69.002	0.677	6.200	10.800	3.096	1	0.756	2
12	A1 SH	STUART	2814.0	63.983	0.690	4.400	9.700	3.103	1	0.733	2
13	A1 SH	STUART	2814.5	61.797	0.675	3.500	9.150	3.101	1	0.711	1
14	A1 SH	STUART	2815.0	61.372	0.646	2.800	9.300	3.065	1	0.689	1
15	A1 SH	STUART	2815.5	63.535	0.621	2.800	9.800	2.982	1	0.667	1
16	A1 SH	STUART	2816.0	65.126	0.600	3.300	10.550	2.914	1	0.644	1
17	A1 SH	STUART	2816.5	75.930	0.576	3.400	11.900	2.845	1	0.600	1
18	A1 SH	STUART	2817.0	85.077	0.584	4.400	12.900	2.854	1	0.578	2
19	A1 SH	STUART	2817.5	89.459	0.598	6.600	13.500	2.986	1	0.556	2
20	A1 SH	STUART	2818.0	88.619	0.610	7.200	14.800	2.988	1	0.533	2
21	A1 SH	STUART	2818.5	81.593	0.636	6.400	13.900	2.998	1	0.511	2
22	A1 SH	STUART	2819.0	66.595	0.702	2.800	11.400	2.988	1	0.489	2
23	A1 SH	STUART	2819.5	55.081	0.789	2.700	8.150	3.028	1	0.467	1
24	A1 SH	STUART	2820.0	48.112	0.840	1.000	7.500	3.073	1	0.444	1
25	A1 SH	STUART	2820.5	43.730	0.846	0.400	7.100	3.146	1	0.422	1
26	A1 SH	STUART	2821.0	44.097	0.840	0.700	6.650	3.205	1	0.400	1
27	A1 SH	STUART	2821.5	46.839	0.842	0.800	6.600	3.254	1	0.378	1
28	A1 SH	STUART	2822.0	50.348	0.843	1.100	6.750	3.230	1	0.356	1
29	A1 SH	STUART	2822.5	57.129	0.822	2.200	7.300	3.237	1	0.333	1
...	...	...	...	...	...	...	...	...	...	...	...
800	B5 LM	CRAWFORD	3146.0	167.803	-0.219	4.270	23.370	3.810	2	0.190	7
801	B5 LM	CRAWFORD	3146.5	151.183	-0.057	0.925	17.125	4.153	2	0.172	8
802	B5 LM	CRAWFORD	3147.0	123.264	0.067	0.285	14.215	4.404	2	0.155	8
803	B5 LM	CRAWFORD	3147.5	108.569	0.234	0.705	12.225	4.499	2	0.138	8
804	B5 LM	CRAWFORD	3148.0	101.072	0.427	1.150	10.760	4.392	2	0.121	8
805	B5 LM	CRAWFORD	3148.5	91.748	0.625	1.135	9.605	4.254	2	0.103	6
806	B5 LM	CRAWFORD	3149.0	83.794	0.749	2.075	7.845	4.023	2	0.086	6
807	B5 LM	CRAWFORD	3149.5	83.794	0.749	2.075	7.845	4.023	2	0.086	6
808	B5 LM	CRAWFORD	3150.0	79.722	0.771	2.890	6.640	4.040	2	0.069	6
809	B5 LM	CRAWFORD	3150.5	76.334	0.800	2.960	6.290	3.997	2	0.052	8
810	B5 LM	CRAWFORD	3151.0	73.631	0.800	2.680	6.690	3.828	2	0.034	8
811	B5 LM	CRAWFORD	3151.5	76.865	0.772	2.420	8.600	3.535	2	0.017	8
812	C SH	CRAWFORD	3152.0	79.924	0.752	2.620	11.510	3.148	1	1.000	3
813	C SH	CRAWFORD	3152.5	82.199	0.728	3.725	14.555	2.964	1	0.972	3
814	C SH	CRAWFORD	3153.0	79.953	0.700	5.610	16.930	2.793	1	0.944	3
815	C SH	CRAWFORD	3153.5	75.881	0.673	6.300	17.570	2.969	1	0.917	3
816	C SH	CRAWFORD	3154.0	67.470	0.652	4.775	15.795	3.282	1	0.889	2
817	C SH	CRAWFORD	3154.5	58.832	0.640	4.315	13.575	3.642	1	0.861	2
818	C SH	CRAWFORD	3155.0	57.946	0.631	3.595	11.305	3.893	1	0.833	2
819	C SH	CRAWFORD	3155.5	65.755	0.625	3.465	10.355	3.911	1	0.806	2
820	C SH	CRAWFORD	3156.0	69.445	0.617	3.390	11.540	3.820	1	0.778	2
821	C SH	CRAWFORD	3156.5	73.389	0.608	3.625	12.775	3.620	1	0.750	2
822	C SH	CRAWFORD	3157.0	77.115	0.605	4.140	13.420	3.467	1	0.722	2
823	C SH	CRAWFORD	3157.5	79.840	0.596	4.875	13.825	3.360	1	0.694	2
824	C SH	CRAWFORD	3158.0	82.616	0.577	5.235	14.845	3.207	1	0.667	2
825	C SH	CRAWFORD	3158.5	86.078	0.554	5.040	16.150	3.161	1	0.639	2
826	C SH	CRAWFORD	3159.0	88.855	0.539	5.560	16.750	3.118	1	0.611	2
827	C SH	CRAWFORD	3159.5	90.490	0.530	6.360	16.780	3.168	1	0.583	2
828	C SH	CRAWFORD	3160.0	90.975	0.522	7.035	16.995	3.154	1	0.556	2
829	C SH	CRAWFORD	3160.5	90.108	0.513	7.505	17.595	3.125	1	0.528	2