Facies classification using Machine Learning

aaML Submission

By:

Alexsandro G. Cerqueira,
Alã de C. Damasceno

There are tow main notebooks:

Data Analysis and edition
Submission



In [1]:

    
from libtools import *

Loading the data training data without Shankle well



In [2]:

    
training = pd.read_csv('data-test.csv')



In [3]:

    
training.head()









    Out[3]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      0
      3
      77.45
      0.664
      9.9
      11.915
      4.6
      1.000
      1
    
    
      1
      3
      78.26
      0.661
      14.2
      12.565
      4.1
      0.979
      1
    
    
      2
      3
      79.05
      0.658
      14.8
      13.050
      3.6
      0.957
      1
    
    
      3
      3
      86.10
      0.655
      13.9
      13.115
      3.5
      0.936
      1
    
    
      4
      3
      74.58
      0.647
      13.5
      13.300
      3.4
      0.915
      1



In [4]:

    
training.describe()









    Out[4]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      count
      3700.000000
      3700.000000
      3700.000000
      3700.000000
      3700.000000
      2783.000000
      3700.000000
      3700.000000
    
    
      mean
      4.615676
      64.873649
      0.663053
      4.651677
      12.892826
      3.805693
      0.524125
      1.542973
    
    
      std
      2.475808
      30.817166
      0.253863
      5.109006
      6.796219
      0.894118
      0.287147
      0.498217
    
    
      min
      1.000000
      10.149000
      -0.025949
      -21.832000
      0.550000
      0.200000
      0.000000
      1.000000
    
    
      25%
      2.000000
      43.778250
      0.502000
      1.800000
      8.350000
      3.200000
      0.278000
      1.000000
    
    
      50%
      4.000000
      64.817000
      0.645613
      4.400000
      11.857500
      3.600000
      0.531000
      2.000000
    
    
      75%
      7.000000
      80.322500
      0.823000
      7.600000
      15.750000
      4.400000
      0.772000
      2.000000
    
    
      max
      9.000000
      361.150000
      1.800000
      19.312000
      84.400000
      8.094000
      1.000000
      2.000000



In [5]:

    
training = training.fillna(-99999)

Loading the SHANKLE well



In [6]:

    
blind = pd.read_csv('blind.csv')



In [7]:

    
blind.head()









    Out[7]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      0
      2
      98.36
      0.642
      -0.1
      18.685
      2.9
      1.000
      1
    
    
      1
      2
      97.57
      0.631
      7.9
      16.745
      3.2
      0.984
      1
    
    
      2
      2
      98.41
      0.615
      12.8
      14.105
      3.2
      0.968
      1
    
    
      3
      2
      85.92
      0.597
      13.0
      13.385
      3.4
      0.952
      1
    
    
      4
      2
      83.16
      0.592
      12.3
      13.345
      3.4
      0.935
      1



In [8]:

    
blind.describe()









    Out[8]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      count
      449.000000
      449.000000
      449.000000
      449.000000
      449.000000
      449.000000
      449.000000
      449.000000
    
    
      mean
      3.576837
      65.431180
      0.630831
      2.348998
      15.741125
      3.224944
      0.503118
      1.342984
    
    
      std
      2.260688
      25.696418
      0.241293
      6.113543
      9.080467
      0.732414
      0.282082
      0.475236
    
    
      min
      1.000000
      18.400000
      0.093000
      -19.900000
      2.890000
      1.500000
      0.010000
      1.000000
    
    
      25%
      2.000000
      54.960000
      0.425000
      0.100000
      9.150000
      2.700000
      0.258000
      1.000000
    
    
      50%
      3.000000
      66.600000
      0.620000
      2.600000
      13.935000
      3.100000
      0.500000
      1.000000
    
    
      75%
      6.000000
      75.150000
      0.817000
      6.200000
      18.575000
      3.600000
      0.744000
      2.000000
    
    
      max
      8.000000
      242.750000
      1.311000
      18.600000
      55.915000
      5.400000
      1.000000
      2.000000



In [9]:

    
training_SH = divisao_sh(training)
training_LM = divisao_lm(training)

blind_SH = divisao_sh(blind)
blind_LM = divisao_lm(blind)



In [10]:

    
training_SH.head()



In [11]:

    
training_LM.head()



In [12]:

    
blind_SH.head()



In [13]:

    
blind_LM.head()



In [14]:

    
X_SH = training_SH.drop(['Facies'],axis=1)
y_SH = training_SH['Facies']

X_LM = training_LM.drop(['Facies'],axis=1)
y_LM = training_LM['Facies']

X_SH_blind = blind_SH.drop(['Facies'],axis=1)
y_SH_blind = blind_SH['Facies']

X_LM_blind = blind_LM.drop(['Facies'],axis=1)
y_LM_blind = blind_LM['Facies']



In [15]:

    
from sklearn.model_selection import train_test_split

X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)

X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)



In [16]:

    
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import classification_report,confusion_matrix



In [17]:

    
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)

ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)









    Out[17]:





ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)



In [18]:

    
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))









    



[[ 9  1  1  0]
 [ 0 79  9  0]
 [ 0 11 59  1]
 [ 0  0  0  0]]
             precision    recall  f1-score   support

          1       1.00      0.82      0.90        11
          2       0.87      0.90      0.88        88
          3       0.86      0.83      0.84        71
          5       0.00      0.00      0.00         0

avg / total       0.87      0.86      0.87       170



In [19]:

    
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))









    



[[ 0  0  0  1  0  0  0]
 [ 0 20  1  2  0  2  0]
 [ 0  0 18  5  1  5  0]
 [ 0  1  1 40  0  9  0]
 [ 0  0  1  0 13  2  0]
 [ 0  0  4  9  1 47  0]
 [ 0  0  0  0  0  2 16]]
             precision    recall  f1-score   support

          3       0.00      0.00      0.00         1
          4       0.95      0.80      0.87        25
          5       0.72      0.62      0.67        29
          6       0.70      0.78      0.74        51
          7       0.87      0.81      0.84        16
          8       0.70      0.77      0.73        61
          9       1.00      0.89      0.94        18

avg / total       0.77      0.77      0.77       201



In [20]:

    
blind_pred_SH = ETC_SH.predict(X_SH_blind)
print(confusion_matrix(y_SH_blind, blind_pred_SH))
print(classification_report(y_SH_blind, blind_pred_SH))









    



[[ 7 81  1]
 [ 5 75  9]
 [ 0 43 74]]
             precision    recall  f1-score   support

          1       0.58      0.08      0.14        89
          2       0.38      0.84      0.52        89
          3       0.88      0.63      0.74       117

avg / total       0.64      0.53      0.49       295



In [21]:

    
blind_pred_LM = ETC_LM.predict(X_LM_blind)
print(confusion_matrix(y_LM_blind, blind_pred_LM))
print(classification_report(y_LM_blind, blind_pred_LM))









    



[[ 0  0  0  0  0  0  0]
 [ 0  1  0  5  0  1  0]
 [ 0  5  3  6  0  5  0]
 [ 1  1  1 52  0 16  0]
 [ 0  0  0  1  2 13  1]
 [ 0  0  0 15  1 23  1]
 [ 0  0  0  0  0  0  0]]
             precision    recall  f1-score   support

          3       0.00      0.00      0.00         0
          4       0.14      0.14      0.14         7
          5       0.75      0.16      0.26        19
          6       0.66      0.73      0.69        71
          7       0.67      0.12      0.20        17
          8       0.40      0.57      0.47        40
          9       0.00      0.00      0.00         0

avg / total       0.58      0.53      0.50       154



In [22]:

    
blind_pred_SH = pd.DataFrame(blind_pred_SH, index=X_SH_blind.index)
blind_pred_LM = pd.DataFrame(blind_pred_LM, index=X_LM_blind.index)
pred_blind = pd.concat([blind_pred_SH,blind_pred_LM])
pred_blind = pred_blind.sort_index()



In [23]:

    
y_blind = blind['Facies']



In [24]:

    
print(confusion_matrix(y_blind, pred_blind))
print(classification_report(y_blind, pred_blind))









    



[[ 7 81  1  0  0  0  0  0  0]
 [ 5 75  9  0  0  0  0  0  0]
 [ 0 43 74  0  0  0  0  0  0]
 [ 0  0  0  1  0  5  0  1  0]
 [ 0  0  0  5  3  6  0  5  0]
 [ 0  0  1  1  1 52  0 16  0]
 [ 0  0  0  0  0  1  2 13  1]
 [ 0  0  0  0  0 15  1 23  1]
 [ 0  0  0  0  0  0  0  0  0]]
             precision    recall  f1-score   support

          1       0.58      0.08      0.14        89
          2       0.38      0.84      0.52        89
          3       0.87      0.63      0.73       117
          4       0.14      0.14      0.14         7
          5       0.75      0.16      0.26        19
          6       0.66      0.73      0.69        71
          7       0.67      0.12      0.20        17
          8       0.40      0.57      0.47        40
          9       0.00      0.00      0.00         0

avg / total       0.62      0.53      0.49       449

Using the complete training data



In [25]:

    
training_data = pd.read_csv('training.csv')



In [26]:

    
training_data.head()









    Out[26]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      0
      3
      77.45
      0.664
      9.9
      11.915
      4.6
      1.000
      1
    
    
      1
      3
      78.26
      0.661
      14.2
      12.565
      4.1
      0.979
      1
    
    
      2
      3
      79.05
      0.658
      14.8
      13.050
      3.6
      0.957
      1
    
    
      3
      3
      86.10
      0.655
      13.9
      13.115
      3.5
      0.936
      1
    
    
      4
      3
      74.58
      0.647
      13.5
      13.300
      3.4
      0.915
      1



In [27]:

    
training_data.describe()









    Out[27]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      count
      4149.000000
      4149.000000
      4149.000000
      4149.000000
      4149.000000
      4149.000000
      4149.000000
      4149.000000
    
    
      mean
      4.503254
      64.933985
      0.659566
      4.402484
      13.201066
      -22098.588517
      0.521852
      1.521330
    
    
      std
      2.474324
      30.302530
      0.252703
      5.274947
      7.132846
      41499.330187
      0.286644
      0.499605
    
    
      min
      1.000000
      10.149000
      -0.025949
      -21.832000
      0.550000
      -99999.000000
      0.000000
      1.000000
    
    
      25%
      2.000000
      44.730000
      0.498000
      1.600000
      8.500000
      2.416000
      0.277000
      1.000000
    
    
      50%
      4.000000
      64.990000
      0.639000
      4.300000
      12.020000
      3.300000
      0.528000
      2.000000
    
    
      75%
      6.000000
      79.438000
      0.822000
      7.500000
      16.050000
      4.000000
      0.769000
      2.000000
    
    
      max
      9.000000
      361.150000
      1.800000
      19.312000
      84.400000
      8.094000
      1.000000
      2.000000



In [28]:

    
training_data_SH = divisao_sh(training_data)
training_data_LM = divisao_lm(training_data)



In [29]:

    
training_data_SH.describe()









    Out[29]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
    
  
  
    
      count
      1986.000000
      1986.000000
      1986.000000
      1986.000000
      1986.000000
      1986.000000
      1986.000000
    
    
      mean
      2.297583
      75.003921
      0.530210
      5.432646
      16.700658
      -18980.149676
      0.511780
    
    
      std
      0.819513
      15.637085
      0.160003
      6.092309
      7.712549
      39227.270999
      0.287772
    
    
      min
      1.000000
      26.230000
      -0.025949
      -19.900000
      4.397000
      -99999.000000
      0.000000
    
    
      25%
      2.000000
      64.378000
      0.441000
      2.792500
      11.920000
      2.459500
      0.261500
    
    
      50%
      2.000000
      73.680000
      0.541205
      5.800000
      14.793500
      3.057500
      0.504500
    
    
      75%
      3.000000
      83.797500
      0.633000
      9.368500
      19.143750
      3.365000
      0.761500
    
    
      max
      9.000000
      221.125000
      0.966000
      19.257000
      84.400000
      5.100000
      1.000000



In [30]:

    
training_data_LM.describe()









    Out[30]:






  
    
      
      Facies
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
    
  
  
    
      count
      2163.000000
      2163.000000
      2163.000000
      2163.000000
      2163.000000
      2163.000000
      2163.000000
    
    
      mean
      6.528433
      55.688079
      0.778336
      3.456620
      9.987847
      -24961.843041
      0.531098
    
    
      std
      1.599173
      36.858662
      0.263687
      4.175825
      4.628169
      43292.673480
      0.285358
    
    
      min
      2.000000
      10.149000
      -0.019000
      -21.832000
      0.550000
      -99999.000000
      0.009000
    
    
      25%
      5.000000
      32.657500
      0.628500
      1.100000
      6.650000
      1.952500
      0.290500
    
    
      50%
      6.000000
      46.923000
      0.799000
      3.100000
      8.992000
      3.900000
      0.544000
    
    
      75%
      8.000000
      68.357500
      0.941000
      5.800500
      12.400000
      4.600000
      0.776000
    
    
      max
      9.000000
      361.150000
      1.800000
      19.312000
      47.721000
      8.094000
      1.000000



In [31]:

    
X_SH = training_data_SH.drop(['Facies'],axis=1)
y_SH = training_data_SH['Facies']

X_LM = training_data_LM.drop(['Facies'],axis=1)
y_LM = training_data_LM['Facies']



In [32]:

    
X_SH.describe()









    Out[32]:






  
    
      
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
    
  
  
    
      count
      1986.000000
      1986.000000
      1986.000000
      1986.000000
      1986.000000
      1986.000000
    
    
      mean
      75.003921
      0.530210
      5.432646
      16.700658
      -18980.149676
      0.511780
    
    
      std
      15.637085
      0.160003
      6.092309
      7.712549
      39227.270999
      0.287772
    
    
      min
      26.230000
      -0.025949
      -19.900000
      4.397000
      -99999.000000
      0.000000
    
    
      25%
      64.378000
      0.441000
      2.792500
      11.920000
      2.459500
      0.261500
    
    
      50%
      73.680000
      0.541205
      5.800000
      14.793500
      3.057500
      0.504500
    
    
      75%
      83.797500
      0.633000
      9.368500
      19.143750
      3.365000
      0.761500
    
    
      max
      221.125000
      0.966000
      19.257000
      84.400000
      5.100000
      1.000000



In [33]:

    
X_LM.describe()









    Out[33]:






  
    
      
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      RELPOS
    
  
  
    
      count
      2163.000000
      2163.000000
      2163.000000
      2163.000000
      2163.000000
      2163.000000
    
    
      mean
      55.688079
      0.778336
      3.456620
      9.987847
      -24961.843041
      0.531098
    
    
      std
      36.858662
      0.263687
      4.175825
      4.628169
      43292.673480
      0.285358
    
    
      min
      10.149000
      -0.019000
      -21.832000
      0.550000
      -99999.000000
      0.009000
    
    
      25%
      32.657500
      0.628500
      1.100000
      6.650000
      1.952500
      0.290500
    
    
      50%
      46.923000
      0.799000
      3.100000
      8.992000
      3.900000
      0.544000
    
    
      75%
      68.357500
      0.941000
      5.800500
      12.400000
      4.600000
      0.776000
    
    
      max
      361.150000
      1.800000
      19.312000
      47.721000
      8.094000
      1.000000



In [34]:

    
from sklearn.model_selection import train_test_split

X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)

X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)

Applying ExtraTreeClassifier



In [35]:

    
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)

ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)









    Out[35]:





ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)



In [36]:

    
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))









    



[[15  8  1  0  0  0]
 [ 3 77  8  0  0  0]
 [ 0 18 65  0  0  0]
 [ 0  0  1  0  0  0]
 [ 0  2  0  0  0  0]
 [ 0  1  0  0  0  0]]
             precision    recall  f1-score   support

          1       0.83      0.62      0.71        24
          2       0.73      0.88      0.79        88
          3       0.87      0.78      0.82        83
          4       0.00      0.00      0.00         1
          5       0.00      0.00      0.00         2
          7       0.00      0.00      0.00         1

avg / total       0.78      0.79      0.78       199



In [37]:

    
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))









    



[[ 2  0  1  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0]
 [ 0  0 30  1  3  0  1  0]
 [ 0  0  1 17  6  0  2  0]
 [ 0  0  3  2 43  0 10  0]
 [ 0  0  1  0  0  9  4  1]
 [ 0  0  1  2  6  0 51  0]
 [ 0  0  0  0  0  1  2 16]]
             precision    recall  f1-score   support

          2       0.67      0.67      0.67         3
          3       0.00      0.00      0.00         1
          4       0.81      0.86      0.83        35
          5       0.77      0.65      0.71        26
          6       0.74      0.74      0.74        58
          7       0.90      0.60      0.72        15
          8       0.73      0.85      0.78        60
          9       0.94      0.84      0.89        19

avg / total       0.78      0.77      0.77       217



In [38]:

    
validation = pd.read_csv('validation_data_nofacies.csv')



In [39]:

    
validation.head()



In [40]:

    
validation.describe()









    Out[40]:






  
    
      
      Depth
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      NM_M
      RELPOS
    
  
  
    
      count
      830.000000
      830.00000
      830.000000
      830.000000
      830.000000
      830.000000
      830.000000
      830.000000
    
    
      mean
      2987.070482
      57.61173
      0.666312
      2.851964
      11.655277
      3.654178
      1.678313
      0.535807
    
    
      std
      94.391925
      27.52774
      0.288367
      3.442074
      5.190236
      0.649793
      0.467405
      0.283062
    
    
      min
      2808.000000
      12.03600
      -0.468000
      -8.900000
      1.855000
      2.113000
      1.000000
      0.013000
    
    
      25%
      2911.625000
      36.77325
      0.541000
      0.411250
      7.700000
      3.171500
      1.000000
      0.300000
    
    
      50%
      2993.750000
      58.34450
      0.675000
      2.397500
      10.950000
      3.515500
      2.000000
      0.547500
    
    
      75%
      3055.375000
      73.05150
      0.850750
      4.600000
      14.793750
      4.191500
      2.000000
      0.778000
    
    
      max
      3160.500000
      220.41300
      1.507000
      16.500000
      31.335000
      6.321000
      2.000000
      1.000000

Making the division between SH and LM



In [41]:

    
validation['Label_Form_SH_LM'] = validation.Formation.apply((label_two_groups_formation))



In [42]:

    
validation.head()









    Out[42]:






  
    
      
      Formation
      Well Name
      Depth
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      NM_M
      RELPOS
      Label_Form_SH_LM
    
  
  
    
      0
      A1 SH
      STUART
      2808.0
      66.276
      0.630
      3.3
      10.65
      3.591
      1
      1.000
      1
    
    
      1
      A1 SH
      STUART
      2808.5
      77.252
      0.585
      6.5
      11.95
      3.341
      1
      0.978
      1
    
    
      2
      A1 SH
      STUART
      2809.0
      82.899
      0.566
      9.4
      13.60
      3.064
      1
      0.956
      1
    
    
      3
      A1 SH
      STUART
      2809.5
      80.671
      0.593
      9.5
      13.25
      2.977
      1
      0.933
      1
    
    
      4
      A1 SH
      STUART
      2810.0
      75.971
      0.638
      8.7
      12.35
      3.020
      1
      0.911
      1



In [43]:

    
validation_SH = divisao_sh(validation)
validation_LM = divisao_lm(validation)



In [44]:

    
validation_SH.head()



In [45]:

    
validation_LM.head()

Removing the colums: Formation, Well Name, Depth



In [46]:

    
X_val_SH = validation_SH.drop(['Formation','Well Name','Depth','NM_M'], axis=1)
X_val_LM = validation_LM.drop(['Formation','Well Name','Depth','NM_M'], axis=1)



In [47]:

    
X_val_SH.head()



In [48]:

    
X_val_LM.head()



In [49]:

    
pred_val_SH = ETC_SH.predict(X_val_SH)



In [50]:

    
pred_val_LM =ETC_LM.predict(X_val_LM)



In [51]:

    
pred_val_SH = pd.DataFrame(pred_val_SH, index=X_val_SH.index)
pred_val_LM = pd.DataFrame(pred_val_LM, index=X_val_LM.index)
pred_val = pd.concat([pred_val_SH,pred_val_LM])
pred_val = pred_val.sort_index()



In [52]:

    
pred_val.describe()



In [53]:

    
validation['Facies Pred'] = pred_val



In [54]:

    
validation=validation.drop(['Label_Form_SH_LM'],axis=1)



In [55]:

    
validation.head()









    Out[55]:






  
    
      
      Formation
      Well Name
      Depth
      GR
      ILD_log10
      DeltaPHI
      PHIND
      PE
      NM_M
      RELPOS
      Facies Pred
    
  
  
    
      0
      A1 SH
      STUART
      2808.0
      66.276
      0.630
      3.3
      10.65
      3.591
      1
      1.000
      3
    
    
      1
      A1 SH
      STUART
      2808.5
      77.252
      0.585
      6.5
      11.95
      3.341
      1
      0.978
      3
    
    
      2
      A1 SH
      STUART
      2809.0
      82.899
      0.566
      9.4
      13.60
      3.064
      1
      0.956
      2
    
    
      3
      A1 SH
      STUART
      2809.5
      80.671
      0.593
      9.5
      13.25
      2.977
      1
      0.933
      3
    
    
      4
      A1 SH
      STUART
      2810.0
      75.971
      0.638
      8.7
      12.35
      3.020
      1
      0.911
      3



In [56]:

    
validation.to_csv('Prediction.csv')



In [ ]:

	Formation	Well Name	Depth	GR	ILD_log10	DeltaPHI	PHIND	PE	NM_M	RELPOS
43	A1 LM	STUART	2829.5	47.345	0.584	7.0	16.30	3.527	2	1.000
44	A1 LM	STUART	2830.0	35.733	0.730	6.4	10.20	3.928	2	0.987
45	A1 LM	STUART	2830.5	29.327	0.873	2.7	7.85	4.330	2	0.974
46	A1 LM	STUART	2831.0	28.242	0.963	1.4	6.30	4.413	2	0.961
47	A1 LM	STUART	2831.5	34.558	1.018	1.8	5.60	4.511	2	0.947

	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS
43	47.345	0.584	7.0	16.30	3.527	1.000
44	35.733	0.730	6.4	10.20	3.928	0.987
45	29.327	0.873	2.7	7.85	4.330	0.974
46	28.242	0.963	1.4	6.30	4.413	0.961
47	34.558	1.018	1.8	5.60	4.511	0.947

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS	Label_Form_SH_LM
0	3	77.45	0.664	9.9	11.915	4.6	1.000	1
1	3	78.26	0.661	14.2	12.565	4.1	0.979	1
2	3	79.05	0.658	14.8	13.050	3.6	0.957	1
3	3	86.10	0.655	13.9	13.115	3.5	0.936	1
4	3	74.58	0.647	13.5	13.300	3.4	0.915	1

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS	Label_Form_SH_LM
count	3700.000000	3700.000000	3700.000000	3700.000000	3700.000000	2783.000000	3700.000000	3700.000000
mean	4.615676	64.873649	0.663053	4.651677	12.892826	3.805693	0.524125	1.542973
std	2.475808	30.817166	0.253863	5.109006	6.796219	0.894118	0.287147	0.498217
min	1.000000	10.149000	-0.025949	-21.832000	0.550000	0.200000	0.000000	1.000000
25%	2.000000	43.778250	0.502000	1.800000	8.350000	3.200000	0.278000	1.000000
50%	4.000000	64.817000	0.645613	4.400000	11.857500	3.600000	0.531000	2.000000
75%	7.000000	80.322500	0.823000	7.600000	15.750000	4.400000	0.772000	2.000000
max	9.000000	361.150000	1.800000	19.312000	84.400000	8.094000	1.000000	2.000000

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS	Label_Form_SH_LM
0	2	98.36	0.642	-0.1	18.685	2.9	1.000	1
1	2	97.57	0.631	7.9	16.745	3.2	0.984	1
2	2	98.41	0.615	12.8	14.105	3.2	0.968	1
3	2	85.92	0.597	13.0	13.385	3.4	0.952	1
4	2	83.16	0.592	12.3	13.345	3.4	0.935	1

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS	Label_Form_SH_LM
count	449.000000	449.000000	449.000000	449.000000	449.000000	449.000000	449.000000	449.000000
mean	3.576837	65.431180	0.630831	2.348998	15.741125	3.224944	0.503118	1.342984
std	2.260688	25.696418	0.241293	6.113543	9.080467	0.732414	0.282082	0.475236
min	1.000000	18.400000	0.093000	-19.900000	2.890000	1.500000	0.010000	1.000000
25%	2.000000	54.960000	0.425000	0.100000	9.150000	2.700000	0.258000	1.000000
50%	3.000000	66.600000	0.620000	2.600000	13.935000	3.100000	0.500000	1.000000
75%	6.000000	75.150000	0.817000	6.200000	18.575000	3.600000	0.744000	2.000000
max	8.000000	242.750000	1.311000	18.600000	55.915000	5.400000	1.000000	2.000000

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS
43	8	57.02	0.628	11.6	12.090	3.5	1.000
44	8	43.29	0.702	7.3	6.705	4.0	0.981
45	8	36.06	0.762	5.7	4.595	5.0	0.963
46	8	46.00	0.823	5.1	4.620	5.5	0.944
47	6	60.30	0.915	4.6	5.010	5.6	0.926

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS
63	8	46.04	0.645	-17.8	28.075	2.3	1.000
64	8	26.73	0.748	-9.0	9.105	5.0	0.953
65	8	36.50	0.794	0.1	5.945	4.5	0.930
66	8	52.24	0.867	4.2	7.395	4.1	0.907
67	6	61.32	0.925	9.4	8.825	3.6	0.884

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS	Label_Form_SH_LM
count	4149.000000	4149.000000	4149.000000	4149.000000	4149.000000	4149.000000	4149.000000	4149.000000
mean	4.503254	64.933985	0.659566	4.402484	13.201066	-22098.588517	0.521852	1.521330
std	2.474324	30.302530	0.252703	5.274947	7.132846	41499.330187	0.286644	0.499605
min	1.000000	10.149000	-0.025949	-21.832000	0.550000	-99999.000000	0.000000	1.000000
25%	2.000000	44.730000	0.498000	1.600000	8.500000	2.416000	0.277000	1.000000
50%	4.000000	64.990000	0.639000	4.300000	12.020000	3.300000	0.528000	2.000000
75%	6.000000	79.438000	0.822000	7.500000	16.050000	4.000000	0.769000	2.000000
max	9.000000	361.150000	1.800000	19.312000	84.400000	8.094000	1.000000	2.000000

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS
count	1986.000000	1986.000000	1986.000000	1986.000000	1986.000000	1986.000000	1986.000000
mean	2.297583	75.003921	0.530210	5.432646	16.700658	-18980.149676	0.511780
std	0.819513	15.637085	0.160003	6.092309	7.712549	39227.270999	0.287772
min	1.000000	26.230000	-0.025949	-19.900000	4.397000	-99999.000000	0.000000
25%	2.000000	64.378000	0.441000	2.792500	11.920000	2.459500	0.261500
50%	2.000000	73.680000	0.541205	5.800000	14.793500	3.057500	0.504500
75%	3.000000	83.797500	0.633000	9.368500	19.143750	3.365000	0.761500
max	9.000000	221.125000	0.966000	19.257000	84.400000	5.100000	1.000000

	Facies	GR	ILD_log10	DeltaPHI	PHIND	PE	RELPOS
count	2163.000000	2163.000000	2163.000000	2163.000000	2163.000000	2163.000000	2163.000000
mean	6.528433	55.688079	0.778336	3.456620	9.987847	-24961.843041	0.531098
std	1.599173	36.858662	0.263687	4.175825	4.628169	43292.673480	0.285358
min	2.000000	10.149000	-0.019000	-21.832000	0.550000	-99999.000000	0.009000
25%	5.000000	32.657500	0.628500	1.100000	6.650000	1.952500	0.290500
50%	6.000000	46.923000	0.799000	3.100000	8.992000	3.900000	0.544000
75%	8.000000	68.357500	0.941000	5.800500	12.400000	4.600000	0.776000
max	9.000000	361.150000	1.800000	19.312000	47.721000	8.094000	1.000000

	Formation	Well Name	Depth	GR	ILD_log10	DeltaPHI	PHIND	PE	NM_M	RELPOS
0	A1 SH	STUART	2808.0	66.276	0.630	3.3	10.65	3.591	1	1.000
1	A1 SH	STUART	2808.5	77.252	0.585	6.5	11.95	3.341	1	0.978
2	A1 SH	STUART	2809.0	82.899	0.566	9.4	13.60	3.064	1	0.956
3	A1 SH	STUART	2809.5	80.671	0.593	9.5	13.25	2.977	1	0.933
4	A1 SH	STUART	2810.0	75.971	0.638	8.7	12.35	3.020	1	0.911

	Depth	GR	ILD_log10	DeltaPHI	PHIND	PE	NM_M	RELPOS
count	830.000000	830.00000	830.000000	830.000000	830.000000	830.000000	830.000000	830.000000
mean	2987.070482	57.61173	0.666312	2.851964	11.655277	3.654178	1.678313	0.535807
std	94.391925	27.52774	0.288367	3.442074	5.190236	0.649793	0.467405	0.283062
min	2808.000000	12.03600	-0.468000	-8.900000	1.855000	2.113000	1.000000	0.013000
25%	2911.625000	36.77325	0.541000	0.411250	7.700000	3.171500	1.000000	0.300000
50%	2993.750000	58.34450	0.675000	2.397500	10.950000	3.515500	2.000000	0.547500
75%	3055.375000	73.05150	0.850750	4.600000	14.793750	4.191500	2.000000	0.778000
max	3160.500000	220.41300	1.507000	16.500000	31.335000	6.321000	2.000000	1.000000