In [1]:
from __future__ import division
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['figure.figsize']=(20.0,10.0)
inline_rc = dict(mpl.rcParams)

import pandas as pd
import numpy as np
import seaborn as sns

from sklearn import preprocessing
from sklearn.model_selection import LeavePGroupsOut
from sklearn.metrics import f1_score
from sklearn.multiclass import OneVsOneClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from scipy.signal import medfilt

from pandas.tools.plotting import scatter_matrix

import matplotlib.colors as colors

import xgboost as xgb
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
from classification_utilities import display_cm, display_adj_cm
from sklearn.model_selection import GridSearchCV


from sklearn.model_selection import validation_curve
from sklearn.datasets import load_svmlight_files

from xgboost.sklearn import XGBClassifier
from scipy.sparse import vstack

seed = 123
np.random.seed(seed)

In [2]:
# Parameters
feature_names = ['GR', 'ILD_log10', 'DeltaPHI', 'PHIND', 'PE', 'NM_M', 'RELPOS']
facies_names = ['SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS']
facies_colors = ['#F4D03F', '#F5B041','#DC7633','#6E2C00', '#1B4F72','#2E86C1', '#AED6F1', '#A569BD', '#196F3D']

In [3]:
# Load data from file
data = pd.read_csv('facies_vectors.csv')

In [4]:
X = data[feature_names].values
y = data['Facies'].values

In [5]:
well = data['Well Name'].values
depth = data['Depth'].values

In [6]:
# Define function for plotting feature statistics
def plot_feature_stats(X, y, feature_names, facies_colors, facies_names):
    
    # Remove NaN
    nan_idx = np.any(np.isnan(X), axis=1)
    X = X[np.logical_not(nan_idx), :]
    y = y[np.logical_not(nan_idx)]
    
    # Merge features and labels into a single DataFrame
    features = pd.DataFrame(X, columns=feature_names)
    labels = pd.DataFrame(y, columns=['Facies'])
    for f_idx, facies in enumerate(facies_names):
        labels[labels[:] == f_idx] = facies
    data = pd.concat((labels, features), axis=1)

    # Plot features statistics
    facies_color_map = {}
    for ind, label in enumerate(facies_names):
        facies_color_map[label] = facies_colors[ind]

    sns.pairplot(data, hue='Facies', palette=facies_color_map, hue_order=list(reversed(facies_names)))

In [7]:
# Feature distribution
plot_feature_stats(X, y, feature_names, facies_colors, facies_names)
mpl.rcParams.update(inline_rc)



In [8]:
# Facies per well
for w_idx, w in enumerate(np.unique(well)):
    ax = plt.subplot(3, 4, w_idx+1)
    hist = np.histogram(y[well == w], bins=np.arange(len(facies_names)+1)+.5)
    plt.bar(np.arange(len(hist[0])), hist[0], color=facies_colors, align='center')
    ax.set_xticks(np.arange(len(hist[0])))
    ax.set_xticklabels(facies_names)
    ax.set_title(w)



In [9]:
# Features per well
for w_idx, w in enumerate(np.unique(well)):
    ax = plt.subplot(3, 4, w_idx+1)
    hist = np.logical_not(np.any(np.isnan(X[well == w, :]), axis=0))
    plt.bar(np.arange(len(hist)), hist, color=facies_colors, align='center')
    ax.set_xticks(np.arange(len(hist)))
    ax.set_xticklabels(feature_names)
    ax.set_yticks([0, 1])
    ax.set_yticklabels(['miss', 'hit'])
    ax.set_title(w)



In [10]:
reg = RandomForestRegressor(max_features='sqrt', n_estimators=50)
DataImpAll = data[feature_names].copy()
DataImp = DataImpAll.dropna(axis = 0, inplace=False)
Ximp=DataImp.loc[:, DataImp.columns != 'PE']
Yimp=DataImp.loc[:, 'PE']
reg.fit(Ximp, Yimp)
X[np.array(DataImpAll.PE.isnull()),4] = reg.predict(DataImpAll.loc[DataImpAll.PE.isnull(),:].drop('PE',axis=1,inplace=False))

In [11]:
# Feature windows concatenation function
def augment_features_window(X, N_neig):
    
    # Parameters
    N_row = X.shape[0]
    N_feat = X.shape[1]

    # Zero padding
    X = np.vstack((np.zeros((N_neig, N_feat)), X, (np.zeros((N_neig, N_feat)))))

    # Loop over windows
    X_aug = np.zeros((N_row, N_feat*(2*N_neig+1)))
    for r in np.arange(N_row)+N_neig:
        this_row = []
        for c in np.arange(-N_neig,N_neig+1):
            this_row = np.hstack((this_row, X[r+c]))
        X_aug[r-N_neig] = this_row

    return X_aug

In [12]:
# Feature gradient computation function
def augment_features_gradient(X, depth):
    
    # Compute features gradient
    d_diff = np.diff(depth).reshape((-1, 1))
    d_diff[d_diff==0] = 0.001
    X_diff = np.diff(X, axis=0)
    X_grad = X_diff / d_diff
        
    # Compensate for last missing value
    X_grad = np.concatenate((X_grad, np.zeros((1, X_grad.shape[1]))))
    
    return X_grad

In [13]:
# Feature augmentation function
def augment_features(X, well, depth, N_neig=1):
    
    # Augment features
    X_aug = np.zeros((X.shape[0], X.shape[1]*(N_neig*2+2)))
    for w in np.unique(well):
        w_idx = np.where(well == w)[0]
        X_aug_win = augment_features_window(X[w_idx, :], N_neig)
        X_aug_grad = augment_features_gradient(X[w_idx, :], depth[w_idx])
        X_aug[w_idx, :] = np.concatenate((X_aug_win, X_aug_grad), axis=1)
    
    # Find padded rows
    padded_rows = np.unique(np.where(X_aug[:, 0:7] == np.zeros((1, 7)))[0])
    
    return X_aug, padded_rows

In [14]:
X_aug, padded_rows = augment_features(X, well, depth)

In [15]:
# Initialize model selection methods
lpgo = LeavePGroupsOut(2)

# Generate splits
split_list = []
for train, val in lpgo.split(X, y, groups=data['Well Name']):
    hist_tr = np.histogram(y[train], bins=np.arange(len(facies_names)+1)+.5)
    hist_val = np.histogram(y[val], bins=np.arange(len(facies_names)+1)+.5)
    if np.all(hist_tr[0] != 0) & np.all(hist_val[0] != 0):
        split_list.append({'train':train, 'val':val})
            
# Print splits
for s, split in enumerate(split_list):
    print('Split %d' % s)
    print('    training:   %s' % (data['Well Name'][split['train']].unique()))
    print('    validation: %s' % (data['Well Name'][split['val']].unique()))


Split 0
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['ALEXANDER D' 'CHURCHMAN BIBLE']
Split 1
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'Recruit F9' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'CROSS H CATTLE']
Split 2
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'KIMZEY A']
Split 3
    training:   ['SHRIMPLIN' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'NOLAN']
Split 4
    training:   ['SHRIMPLIN' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['ALEXANDER D' 'SHANKLE']
Split 5
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['CROSS H CATTLE' 'CHURCHMAN BIBLE']
Split 6
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['KIMZEY A' 'CHURCHMAN BIBLE']
Split 7
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['LUKE G U' 'CHURCHMAN BIBLE']
Split 8
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NOLAN' 'Recruit F9']
    validation: ['NEWBY' 'CHURCHMAN BIBLE']
Split 9
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY']
    validation: ['NOLAN' 'CHURCHMAN BIBLE']
Split 10
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NOLAN' 'NEWBY']
    validation: ['Recruit F9' 'CHURCHMAN BIBLE']
Split 11
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['SHANKLE' 'CHURCHMAN BIBLE']
Split 12
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'NEWBY']
    validation: ['SHRIMPLIN' 'CHURCHMAN BIBLE']
Split 13
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'CROSS H CATTLE']
Split 14
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['CROSS H CATTLE' 'NEWBY']
Split 15
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['CROSS H CATTLE' 'Recruit F9']
Split 16
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'NOLAN' 'Recruit F9' 'NEWBY'
 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'CROSS H CATTLE']
Split 17
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['LUKE G U' 'KIMZEY A']
Split 18
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'NEWBY']
Split 19
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'NOLAN']
Split 20
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['KIMZEY A' 'Recruit F9']
Split 21
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'KIMZEY A']
Split 22
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'KIMZEY A']
Split 23
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['NOLAN' 'NEWBY']
Split 24
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'Recruit F9' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'NEWBY']
Split 25
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['NOLAN' 'Recruit F9']
Split 26
    training:   ['ALEXANDER D' 'SHANKLE' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE'
 'Recruit F9' 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'NOLAN']
Split 27
    training:   ['SHRIMPLIN' 'ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHANKLE' 'Recruit F9']
Split 28
    training:   ['ALEXANDER D' 'LUKE G U' 'KIMZEY A' 'CROSS H CATTLE' 'NOLAN' 'Recruit F9'
 'NEWBY' 'CHURCHMAN BIBLE']
    validation: ['SHRIMPLIN' 'SHANKLE']

In [25]:
# Parameters search grid (uncomment parameters for full grid search... may take a lot of time)
md_grid = [3]
mcw_grid = [1]
gamma_grid = [0.3]  
ss_grid = [0.7] 
csb_grid = [0.8]
alpha_grid =[0.2]
lr_grid = [0.05]
ne_grid = [200]
param_grid = []
for N in md_grid:
    for M in mcw_grid:
        for S in gamma_grid:
            for L in ss_grid:
                for K in csb_grid:
                    for P in alpha_grid:
                        for R in lr_grid:
                            for E in ne_grid:
                                param_grid.append({'maxdepth':N, 
                                                   'minchildweight':M, 
                                                   'gamma':S, 
                                                   'subsample':L,
                                                   'colsamplebytree':K,
                                                   'alpha':P,
                                                   'learningrate':R,
                                                   'n_estimators':E})

In [18]:
# Train and test a classifier
def train_and_test(X_tr, y_tr, X_v,well_v):
    
    #Feature normalization
    scaler = preprocessing.RobustScaler(quantile_range=(25.0, 75.0)).fit(X_tr)
    X_tr = scaler.transform(X_tr)
    X_v = scaler.transform(X_v)


    clf.fit(X_tr, y_tr)
    
    # Test classifier
    y_v_hat = clf.predict(X_v)
    
    # Clean isolated facies for each well
    for w in np.unique(well_v):
        y_v_hat[well_v==w] = medfilt(y_v_hat[well_v==w], kernel_size=5)
    
    return y_v_hat

In [21]:
# For each set of parameters
score_param = []
for param in param_grid:
    
    clf = OneVsOneClassifier(XGBClassifier(
            learning_rate = param['learningrate'],
            n_estimators=param['n_estimators'],
            max_depth=param['maxdepth'],
            min_child_weight=param['minchildweight'],
            gamma = param['gamma'],
            subsample=param['subsample'],
            colsample_bytree=param['colsamplebytree'],
            reg_alpha = param['alpha'],
            nthread =4,
            seed = seed,
        ) , n_jobs=-1)
    # For each data split
    score_split = []
    for split in split_list:
    
        # Remove padded rows
        split_train_no_pad = np.setdiff1d(split['train'], padded_rows)
        
        # Select training and validation data from current split
        X_tr = X_aug[split_train_no_pad, :]
        X_v = X_aug[split['val'], :]
        y_tr = y[split_train_no_pad]
        y_v = y[split['val']]
        
        # Select well labels for validation data
        well_v = well[split['val']]

        # Train and test
        y_v_hat = train_and_test(X_tr, y_tr, X_v, well_v)
        
        # Score
        score = f1_score(y_v, y_v_hat, average='micro')
        score_split.append(score)
        
    # Average score for this param
    score_param.append(np.mean(score_split))
    print('F1 score = %.3f %s' % (score_param[-1], param))
          
# Best set of parameters
best_idx = np.argmax(score_param)
param_best = param_grid[best_idx]
score_best = score_param[best_idx]
print('\nBest F1 score = %.3f %s' % (score_best, param_best))


F1 score = 0.563 {'minchildweight': 1, 'maxdepth': 3, 'subsample': 0.7, 'n_estimators': 200, 'colsamplebytree': 0.8, 'alpha': 0.2, 'gamma': 0.3, 'learningrate': 0.05}

Best F1 score = 0.563 {'minchildweight': 1, 'maxdepth': 3, 'subsample': 0.7, 'n_estimators': 200, 'colsamplebytree': 0.8, 'alpha': 0.2, 'gamma': 0.3, 'learningrate': 0.05}

In [ ]:


In [26]:
for param in param_grid:
    
    clf = OneVsOneClassifier(XGBClassifier(
            learning_rate = param['learningrate'],
            n_estimators=param['n_estimators'],
            max_depth=param['maxdepth'],
            min_child_weight=param['minchildweight'],
            gamma = param['gamma'],
            subsample=param['subsample'],
            colsample_bytree=param['colsamplebytree'],
            reg_alpha = param['alpha'],
            nthread =4,
            seed = seed,
        ) , n_jobs=-1)

In [27]:
clf


Out[27]:
OneVsOneClassifier(estimator=XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.8,
       gamma=0.3, learning_rate=0.05, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=200, nthread=4,
       objective='binary:logistic', reg_alpha=0.2, reg_lambda=1,
       scale_pos_weight=1, seed=123, silent=True, subsample=0.7),
          n_jobs=-1)

In [28]:
# Load data from file
test_data = pd.read_csv('validation_data_nofacies.csv')

In [29]:
# Prepare training data
X_tr = X
y_tr = y

# Augment features
X_tr, padded_rows = augment_features(X_tr, well, depth)

# Removed padded rows
X_tr = np.delete(X_tr, padded_rows, axis=0)
y_tr = np.delete(y_tr, padded_rows, axis=0)

In [30]:
# Prepare test data
well_ts = test_data['Well Name'].values
depth_ts = test_data['Depth'].values
X_ts = test_data[feature_names].values

# Augment features
X_ts, padded_rows = augment_features(X_ts, well_ts, depth_ts)

In [31]:
# Predict test labels
y_ts_hat = train_and_test(X_tr, y_tr, X_ts, well_ts)

In [32]:
# Save predicted labels
test_data['Facies'] = y_ts_hat
test_data.to_csv('Prediction6.csv')

In [38]:
test_data


Out[38]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS Facies
0 A1 SH STUART 2808.0 66.276 0.630 3.300 10.650 3.591 1 1.000 3
1 A1 SH STUART 2808.5 77.252 0.585 6.500 11.950 3.341 1 0.978 3
2 A1 SH STUART 2809.0 82.899 0.566 9.400 13.600 3.064 1 0.956 3
3 A1 SH STUART 2809.5 80.671 0.593 9.500 13.250 2.977 1 0.933 3
4 A1 SH STUART 2810.0 75.971 0.638 8.700 12.350 3.020 1 0.911 3
5 A1 SH STUART 2810.5 73.955 0.667 6.900 12.250 3.086 1 0.889 3
6 A1 SH STUART 2811.0 77.962 0.674 6.500 12.450 3.092 1 0.867 3
7 A1 SH STUART 2811.5 83.894 0.667 6.300 12.650 3.123 1 0.844 3
8 A1 SH STUART 2812.0 84.424 0.653 6.700 13.050 3.121 1 0.822 3
9 A1 SH STUART 2812.5 83.160 0.642 7.300 12.950 3.127 1 0.800 3
10 A1 SH STUART 2813.0 79.063 0.651 7.300 12.050 3.147 1 0.778 2
11 A1 SH STUART 2813.5 69.002 0.677 6.200 10.800 3.096 1 0.756 2
12 A1 SH STUART 2814.0 63.983 0.690 4.400 9.700 3.103 1 0.733 2
13 A1 SH STUART 2814.5 61.797 0.675 3.500 9.150 3.101 1 0.711 2
14 A1 SH STUART 2815.0 61.372 0.646 2.800 9.300 3.065 1 0.689 2
15 A1 SH STUART 2815.5 63.535 0.621 2.800 9.800 2.982 1 0.667 2
16 A1 SH STUART 2816.0 65.126 0.600 3.300 10.550 2.914 1 0.644 2
17 A1 SH STUART 2816.5 75.930 0.576 3.400 11.900 2.845 1 0.600 2
18 A1 SH STUART 2817.0 85.077 0.584 4.400 12.900 2.854 1 0.578 2
19 A1 SH STUART 2817.5 89.459 0.598 6.600 13.500 2.986 1 0.556 2
20 A1 SH STUART 2818.0 88.619 0.610 7.200 14.800 2.988 1 0.533 2
21 A1 SH STUART 2818.5 81.593 0.636 6.400 13.900 2.998 1 0.511 2
22 A1 SH STUART 2819.0 66.595 0.702 2.800 11.400 2.988 1 0.489 2
23 A1 SH STUART 2819.5 55.081 0.789 2.700 8.150 3.028 1 0.467 2
24 A1 SH STUART 2820.0 48.112 0.840 1.000 7.500 3.073 1 0.444 1
25 A1 SH STUART 2820.5 43.730 0.846 0.400 7.100 3.146 1 0.422 1
26 A1 SH STUART 2821.0 44.097 0.840 0.700 6.650 3.205 1 0.400 1
27 A1 SH STUART 2821.5 46.839 0.842 0.800 6.600 3.254 1 0.378 1
28 A1 SH STUART 2822.0 50.348 0.843 1.100 6.750 3.230 1 0.356 1
29 A1 SH STUART 2822.5 57.129 0.822 2.200 7.300 3.237 1 0.333 1
... ... ... ... ... ... ... ... ... ... ... ...
800 B5 LM CRAWFORD 3146.0 167.803 -0.219 4.270 23.370 3.810 2 0.190 7
801 B5 LM CRAWFORD 3146.5 151.183 -0.057 0.925 17.125 4.153 2 0.172 8
802 B5 LM CRAWFORD 3147.0 123.264 0.067 0.285 14.215 4.404 2 0.155 8
803 B5 LM CRAWFORD 3147.5 108.569 0.234 0.705 12.225 4.499 2 0.138 8
804 B5 LM CRAWFORD 3148.0 101.072 0.427 1.150 10.760 4.392 2 0.121 8
805 B5 LM CRAWFORD 3148.5 91.748 0.625 1.135 9.605 4.254 2 0.103 6
806 B5 LM CRAWFORD 3149.0 83.794 0.749 2.075 7.845 4.023 2 0.086 6
807 B5 LM CRAWFORD 3149.5 83.794 0.749 2.075 7.845 4.023 2 0.086 6
808 B5 LM CRAWFORD 3150.0 79.722 0.771 2.890 6.640 4.040 2 0.069 8
809 B5 LM CRAWFORD 3150.5 76.334 0.800 2.960 6.290 3.997 2 0.052 8
810 B5 LM CRAWFORD 3151.0 73.631 0.800 2.680 6.690 3.828 2 0.034 8
811 B5 LM CRAWFORD 3151.5 76.865 0.772 2.420 8.600 3.535 2 0.017 8
812 C SH CRAWFORD 3152.0 79.924 0.752 2.620 11.510 3.148 1 1.000 3
813 C SH CRAWFORD 3152.5 82.199 0.728 3.725 14.555 2.964 1 0.972 3
814 C SH CRAWFORD 3153.0 79.953 0.700 5.610 16.930 2.793 1 0.944 3
815 C SH CRAWFORD 3153.5 75.881 0.673 6.300 17.570 2.969 1 0.917 3
816 C SH CRAWFORD 3154.0 67.470 0.652 4.775 15.795 3.282 1 0.889 3
817 C SH CRAWFORD 3154.5 58.832 0.640 4.315 13.575 3.642 1 0.861 2
818 C SH CRAWFORD 3155.0 57.946 0.631 3.595 11.305 3.893 1 0.833 2
819 C SH CRAWFORD 3155.5 65.755 0.625 3.465 10.355 3.911 1 0.806 2
820 C SH CRAWFORD 3156.0 69.445 0.617 3.390 11.540 3.820 1 0.778 2
821 C SH CRAWFORD 3156.5 73.389 0.608 3.625 12.775 3.620 1 0.750 2
822 C SH CRAWFORD 3157.0 77.115 0.605 4.140 13.420 3.467 1 0.722 2
823 C SH CRAWFORD 3157.5 79.840 0.596 4.875 13.825 3.360 1 0.694 2
824 C SH CRAWFORD 3158.0 82.616 0.577 5.235 14.845 3.207 1 0.667 2
825 C SH CRAWFORD 3158.5 86.078 0.554 5.040 16.150 3.161 1 0.639 2
826 C SH CRAWFORD 3159.0 88.855 0.539 5.560 16.750 3.118 1 0.611 2
827 C SH CRAWFORD 3159.5 90.490 0.530 6.360 16.780 3.168 1 0.583 2
828 C SH CRAWFORD 3160.0 90.975 0.522 7.035 16.995 3.154 1 0.556 2
829 C SH CRAWFORD 3160.5 90.108 0.513 7.505 17.595 3.125 1 0.528 2

830 rows × 11 columns


In [ ]:


In [33]:
def make_facies_log_plot(logs, facies_colors):
    #make sure logs are sorted by depth
    logs = logs.sort_values(by='Depth')
    cmap_facies = colors.ListedColormap(
            facies_colors[0:len(facies_colors)], 'indexed')
    
    ztop=logs.Depth.min(); zbot=logs.Depth.max()
    
    cluster=np.repeat(np.expand_dims(logs['Facies'].values,1), 100, 1)
    
    f, ax = plt.subplots(nrows=1, ncols=6, figsize=(8, 12))
    ax[0].plot(logs.GR, logs.Depth, '-g')
    ax[1].plot(logs.ILD_log10, logs.Depth, '-')
    ax[2].plot(logs.DeltaPHI, logs.Depth, '-', color='0.5')
    ax[3].plot(logs.PHIND, logs.Depth, '-', color='r')
    ax[4].plot(logs.PE, logs.Depth, '-', color='black')
    im=ax[5].imshow(cluster, interpolation='none', aspect='auto',
                    cmap=cmap_facies,vmin=1,vmax=9)
    
    divider = make_axes_locatable(ax[5])
    cax = divider.append_axes("right", size="20%", pad=0.05)
    cbar=plt.colorbar(im, cax=cax)
    cbar.set_label((17*' ').join([' SS ', 'CSiS', 'FSiS', 
                                'SiSh', ' MS ', ' WS ', ' D  ', 
                                ' PS ', ' BS ']))
    cbar.set_ticks(range(0,1)); cbar.set_ticklabels('')
    
    for i in range(len(ax)-1):
        ax[i].set_ylim(ztop,zbot)
        ax[i].invert_yaxis()
        ax[i].grid()
        ax[i].locator_params(axis='x', nbins=3)
    
    ax[0].set_xlabel("GR")
    ax[0].set_xlim(logs.GR.min(),logs.GR.max())
    ax[1].set_xlabel("ILD_log10")
    ax[1].set_xlim(logs.ILD_log10.min(),logs.ILD_log10.max())
    ax[2].set_xlabel("DeltaPHI")
    ax[2].set_xlim(logs.DeltaPHI.min(),logs.DeltaPHI.max())
    ax[3].set_xlabel("PHIND")
    ax[3].set_xlim(logs.PHIND.min(),logs.PHIND.max())
    ax[4].set_xlabel("PE")
    ax[4].set_xlim(logs.PE.min(),logs.PE.max())
    ax[5].set_xlabel('Facies')
    
    ax[1].set_yticklabels([]); ax[2].set_yticklabels([]); ax[3].set_yticklabels([])
    ax[4].set_yticklabels([]); ax[5].set_yticklabels([])
    ax[5].set_xticklabels([])
    f.suptitle('Well: %s'%logs.iloc[0]['Well Name'], fontsize=14,y=0.94)

In [34]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

In [35]:
# Plot predicted labels
make_facies_log_plot(
    test_data[test_data['Well Name'] == 'STUART'],
    facies_colors=facies_colors)

make_facies_log_plot(
    test_data[test_data['Well Name'] == 'CRAWFORD'],
    facies_colors=facies_colors)
mpl.rcParams.update(inline_rc)



In [ ]: