Facies classification using Machine Learning

aaML Submission

By:

Alexsandro G. Cerqueira,
Alã de C. Damasceno

There are tow main notebooks:

  • Data Analysis and edition
  • Submission

In [1]:
from libtools import *

Loading the data training data without Shankle well


In [2]:
training = pd.read_csv('data-test.csv')

In [3]:
training.head()


Out[3]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
0 3 77.45 0.664 9.9 11.915 4.6 1.000 1
1 3 78.26 0.661 14.2 12.565 4.1 0.979 1
2 3 79.05 0.658 14.8 13.050 3.6 0.957 1
3 3 86.10 0.655 13.9 13.115 3.5 0.936 1
4 3 74.58 0.647 13.5 13.300 3.4 0.915 1

In [4]:
training.describe()


Out[4]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
count 3700.000000 3700.000000 3700.000000 3700.000000 3700.000000 2783.000000 3700.000000 3700.000000
mean 4.615676 64.873649 0.663053 4.651677 12.892826 3.805693 0.524125 1.542973
std 2.475808 30.817166 0.253863 5.109006 6.796219 0.894118 0.287147 0.498217
min 1.000000 10.149000 -0.025949 -21.832000 0.550000 0.200000 0.000000 1.000000
25% 2.000000 43.778250 0.502000 1.800000 8.350000 3.200000 0.278000 1.000000
50% 4.000000 64.817000 0.645613 4.400000 11.857500 3.600000 0.531000 2.000000
75% 7.000000 80.322500 0.823000 7.600000 15.750000 4.400000 0.772000 2.000000
max 9.000000 361.150000 1.800000 19.312000 84.400000 8.094000 1.000000 2.000000

In [5]:
training = training.fillna(-99999)

Loading the SHANKLE well


In [6]:
blind = pd.read_csv('blind.csv')

In [7]:
blind.head()


Out[7]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
0 2 98.36 0.642 -0.1 18.685 2.9 1.000 1
1 2 97.57 0.631 7.9 16.745 3.2 0.984 1
2 2 98.41 0.615 12.8 14.105 3.2 0.968 1
3 2 85.92 0.597 13.0 13.385 3.4 0.952 1
4 2 83.16 0.592 12.3 13.345 3.4 0.935 1

In [8]:
blind.describe()


Out[8]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
count 449.000000 449.000000 449.000000 449.000000 449.000000 449.000000 449.000000 449.000000
mean 3.576837 65.431180 0.630831 2.348998 15.741125 3.224944 0.503118 1.342984
std 2.260688 25.696418 0.241293 6.113543 9.080467 0.732414 0.282082 0.475236
min 1.000000 18.400000 0.093000 -19.900000 2.890000 1.500000 0.010000 1.000000
25% 2.000000 54.960000 0.425000 0.100000 9.150000 2.700000 0.258000 1.000000
50% 3.000000 66.600000 0.620000 2.600000 13.935000 3.100000 0.500000 1.000000
75% 6.000000 75.150000 0.817000 6.200000 18.575000 3.600000 0.744000 2.000000
max 8.000000 242.750000 1.311000 18.600000 55.915000 5.400000 1.000000 2.000000

In [9]:
training_SH = divisao_sh(training)
training_LM = divisao_lm(training)

blind_SH = divisao_sh(blind)
blind_LM = divisao_lm(blind)

In [10]:
training_SH.head()


Out[10]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
0 3 77.45 0.664 9.9 11.915 4.6 1.000
1 3 78.26 0.661 14.2 12.565 4.1 0.979
2 3 79.05 0.658 14.8 13.050 3.6 0.957
3 3 86.10 0.655 13.9 13.115 3.5 0.936
4 3 74.58 0.647 13.5 13.300 3.4 0.915

In [11]:
training_LM.head()


Out[11]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
43 8 57.02 0.628 11.6 12.090 3.5 1.000
44 8 43.29 0.702 7.3 6.705 4.0 0.981
45 8 36.06 0.762 5.7 4.595 5.0 0.963
46 8 46.00 0.823 5.1 4.620 5.5 0.944
47 6 60.30 0.915 4.6 5.010 5.6 0.926

In [12]:
blind_SH.head()


Out[12]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
0 2 98.36 0.642 -0.1 18.685 2.9 1.000
1 2 97.57 0.631 7.9 16.745 3.2 0.984
2 2 98.41 0.615 12.8 14.105 3.2 0.968
3 2 85.92 0.597 13.0 13.385 3.4 0.952
4 2 83.16 0.592 12.3 13.345 3.4 0.935

In [13]:
blind_LM.head()


Out[13]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
63 8 46.04 0.645 -17.8 28.075 2.3 1.000
64 8 26.73 0.748 -9.0 9.105 5.0 0.953
65 8 36.50 0.794 0.1 5.945 4.5 0.930
66 8 52.24 0.867 4.2 7.395 4.1 0.907
67 6 61.32 0.925 9.4 8.825 3.6 0.884

In [14]:
X_SH = training_SH.drop(['Facies'],axis=1)
y_SH = training_SH['Facies']

X_LM = training_LM.drop(['Facies'],axis=1)
y_LM = training_LM['Facies']

X_SH_blind = blind_SH.drop(['Facies'],axis=1)
y_SH_blind = blind_SH['Facies']

X_LM_blind = blind_LM.drop(['Facies'],axis=1)
y_LM_blind = blind_LM['Facies']

In [15]:
from sklearn.model_selection import train_test_split

X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)

X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)

In [16]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import classification_report,confusion_matrix

In [17]:
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)

ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)


Out[17]:
ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [18]:
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))


[[ 9  1  1  0]
 [ 0 79  9  0]
 [ 0 11 59  1]
 [ 0  0  0  0]]
             precision    recall  f1-score   support

          1       1.00      0.82      0.90        11
          2       0.87      0.90      0.88        88
          3       0.86      0.83      0.84        71
          5       0.00      0.00      0.00         0

avg / total       0.87      0.86      0.87       170


In [19]:
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))


[[ 0  0  0  1  0  0  0]
 [ 0 20  1  2  0  2  0]
 [ 0  0 18  5  1  5  0]
 [ 0  1  1 40  0  9  0]
 [ 0  0  1  0 13  2  0]
 [ 0  0  4  9  1 47  0]
 [ 0  0  0  0  0  2 16]]
             precision    recall  f1-score   support

          3       0.00      0.00      0.00         1
          4       0.95      0.80      0.87        25
          5       0.72      0.62      0.67        29
          6       0.70      0.78      0.74        51
          7       0.87      0.81      0.84        16
          8       0.70      0.77      0.73        61
          9       1.00      0.89      0.94        18

avg / total       0.77      0.77      0.77       201


In [20]:
blind_pred_SH = ETC_SH.predict(X_SH_blind)
print(confusion_matrix(y_SH_blind, blind_pred_SH))
print(classification_report(y_SH_blind, blind_pred_SH))


[[ 7 81  1]
 [ 5 75  9]
 [ 0 43 74]]
             precision    recall  f1-score   support

          1       0.58      0.08      0.14        89
          2       0.38      0.84      0.52        89
          3       0.88      0.63      0.74       117

avg / total       0.64      0.53      0.49       295


In [21]:
blind_pred_LM = ETC_LM.predict(X_LM_blind)
print(confusion_matrix(y_LM_blind, blind_pred_LM))
print(classification_report(y_LM_blind, blind_pred_LM))


[[ 0  0  0  0  0  0  0]
 [ 0  1  0  5  0  1  0]
 [ 0  5  3  6  0  5  0]
 [ 1  1  1 52  0 16  0]
 [ 0  0  0  1  2 13  1]
 [ 0  0  0 15  1 23  1]
 [ 0  0  0  0  0  0  0]]
             precision    recall  f1-score   support

          3       0.00      0.00      0.00         0
          4       0.14      0.14      0.14         7
          5       0.75      0.16      0.26        19
          6       0.66      0.73      0.69        71
          7       0.67      0.12      0.20        17
          8       0.40      0.57      0.47        40
          9       0.00      0.00      0.00         0

avg / total       0.58      0.53      0.50       154


In [22]:
blind_pred_SH = pd.DataFrame(blind_pred_SH, index=X_SH_blind.index)
blind_pred_LM = pd.DataFrame(blind_pred_LM, index=X_LM_blind.index)
pred_blind = pd.concat([blind_pred_SH,blind_pred_LM])
pred_blind = pred_blind.sort_index()

In [23]:
y_blind = blind['Facies']

In [24]:
print(confusion_matrix(y_blind, pred_blind))
print(classification_report(y_blind, pred_blind))


[[ 7 81  1  0  0  0  0  0  0]
 [ 5 75  9  0  0  0  0  0  0]
 [ 0 43 74  0  0  0  0  0  0]
 [ 0  0  0  1  0  5  0  1  0]
 [ 0  0  0  5  3  6  0  5  0]
 [ 0  0  1  1  1 52  0 16  0]
 [ 0  0  0  0  0  1  2 13  1]
 [ 0  0  0  0  0 15  1 23  1]
 [ 0  0  0  0  0  0  0  0  0]]
             precision    recall  f1-score   support

          1       0.58      0.08      0.14        89
          2       0.38      0.84      0.52        89
          3       0.87      0.63      0.73       117
          4       0.14      0.14      0.14         7
          5       0.75      0.16      0.26        19
          6       0.66      0.73      0.69        71
          7       0.67      0.12      0.20        17
          8       0.40      0.57      0.47        40
          9       0.00      0.00      0.00         0

avg / total       0.62      0.53      0.49       449

Using the complete training data


In [25]:
training_data = pd.read_csv('training.csv')

In [26]:
training_data.head()


Out[26]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
0 3 77.45 0.664 9.9 11.915 4.6 1.000 1
1 3 78.26 0.661 14.2 12.565 4.1 0.979 1
2 3 79.05 0.658 14.8 13.050 3.6 0.957 1
3 3 86.10 0.655 13.9 13.115 3.5 0.936 1
4 3 74.58 0.647 13.5 13.300 3.4 0.915 1

In [27]:
training_data.describe()


Out[27]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS Label_Form_SH_LM
count 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000 4149.000000
mean 4.503254 64.933985 0.659566 4.402484 13.201066 -22098.588517 0.521852 1.521330
std 2.474324 30.302530 0.252703 5.274947 7.132846 41499.330187 0.286644 0.499605
min 1.000000 10.149000 -0.025949 -21.832000 0.550000 -99999.000000 0.000000 1.000000
25% 2.000000 44.730000 0.498000 1.600000 8.500000 2.416000 0.277000 1.000000
50% 4.000000 64.990000 0.639000 4.300000 12.020000 3.300000 0.528000 2.000000
75% 6.000000 79.438000 0.822000 7.500000 16.050000 4.000000 0.769000 2.000000
max 9.000000 361.150000 1.800000 19.312000 84.400000 8.094000 1.000000 2.000000

In [28]:
training_data_SH = divisao_sh(training_data)
training_data_LM = divisao_lm(training_data)

In [29]:
training_data_SH.describe()


Out[29]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000
mean 2.297583 75.003921 0.530210 5.432646 16.700658 -18980.149676 0.511780
std 0.819513 15.637085 0.160003 6.092309 7.712549 39227.270999 0.287772
min 1.000000 26.230000 -0.025949 -19.900000 4.397000 -99999.000000 0.000000
25% 2.000000 64.378000 0.441000 2.792500 11.920000 2.459500 0.261500
50% 2.000000 73.680000 0.541205 5.800000 14.793500 3.057500 0.504500
75% 3.000000 83.797500 0.633000 9.368500 19.143750 3.365000 0.761500
max 9.000000 221.125000 0.966000 19.257000 84.400000 5.100000 1.000000

In [30]:
training_data_LM.describe()


Out[30]:
Facies GR ILD_log10 DeltaPHI PHIND PE RELPOS
count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000
mean 6.528433 55.688079 0.778336 3.456620 9.987847 -24961.843041 0.531098
std 1.599173 36.858662 0.263687 4.175825 4.628169 43292.673480 0.285358
min 2.000000 10.149000 -0.019000 -21.832000 0.550000 -99999.000000 0.009000
25% 5.000000 32.657500 0.628500 1.100000 6.650000 1.952500 0.290500
50% 6.000000 46.923000 0.799000 3.100000 8.992000 3.900000 0.544000
75% 8.000000 68.357500 0.941000 5.800500 12.400000 4.600000 0.776000
max 9.000000 361.150000 1.800000 19.312000 47.721000 8.094000 1.000000

In [31]:
X_SH = training_data_SH.drop(['Facies'],axis=1)
y_SH = training_data_SH['Facies']

X_LM = training_data_LM.drop(['Facies'],axis=1)
y_LM = training_data_LM['Facies']

In [32]:
X_SH.describe()


Out[32]:
GR ILD_log10 DeltaPHI PHIND PE RELPOS
count 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000 1986.000000
mean 75.003921 0.530210 5.432646 16.700658 -18980.149676 0.511780
std 15.637085 0.160003 6.092309 7.712549 39227.270999 0.287772
min 26.230000 -0.025949 -19.900000 4.397000 -99999.000000 0.000000
25% 64.378000 0.441000 2.792500 11.920000 2.459500 0.261500
50% 73.680000 0.541205 5.800000 14.793500 3.057500 0.504500
75% 83.797500 0.633000 9.368500 19.143750 3.365000 0.761500
max 221.125000 0.966000 19.257000 84.400000 5.100000 1.000000

In [33]:
X_LM.describe()


Out[33]:
GR ILD_log10 DeltaPHI PHIND PE RELPOS
count 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000 2163.000000
mean 55.688079 0.778336 3.456620 9.987847 -24961.843041 0.531098
std 36.858662 0.263687 4.175825 4.628169 43292.673480 0.285358
min 10.149000 -0.019000 -21.832000 0.550000 -99999.000000 0.009000
25% 32.657500 0.628500 1.100000 6.650000 1.952500 0.290500
50% 46.923000 0.799000 3.100000 8.992000 3.900000 0.544000
75% 68.357500 0.941000 5.800500 12.400000 4.600000 0.776000
max 361.150000 1.800000 19.312000 47.721000 8.094000 1.000000

In [34]:
from sklearn.model_selection import train_test_split

X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)

X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)

Applying ExtraTreeClassifier


In [35]:
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)

ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)


Out[35]:
ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=500, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)

In [36]:
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))


[[15  8  1  0  0  0]
 [ 3 77  8  0  0  0]
 [ 0 18 65  0  0  0]
 [ 0  0  1  0  0  0]
 [ 0  2  0  0  0  0]
 [ 0  1  0  0  0  0]]
             precision    recall  f1-score   support

          1       0.83      0.62      0.71        24
          2       0.73      0.88      0.79        88
          3       0.87      0.78      0.82        83
          4       0.00      0.00      0.00         1
          5       0.00      0.00      0.00         2
          7       0.00      0.00      0.00         1

avg / total       0.78      0.79      0.78       199


In [37]:
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))


[[ 2  0  1  0  0  0  0  0]
 [ 1  0  0  0  0  0  0  0]
 [ 0  0 30  1  3  0  1  0]
 [ 0  0  1 17  6  0  2  0]
 [ 0  0  3  2 43  0 10  0]
 [ 0  0  1  0  0  9  4  1]
 [ 0  0  1  2  6  0 51  0]
 [ 0  0  0  0  0  1  2 16]]
             precision    recall  f1-score   support

          2       0.67      0.67      0.67         3
          3       0.00      0.00      0.00         1
          4       0.81      0.86      0.83        35
          5       0.77      0.65      0.71        26
          6       0.74      0.74      0.74        58
          7       0.90      0.60      0.72        15
          8       0.73      0.85      0.78        60
          9       0.94      0.84      0.89        19

avg / total       0.78      0.77      0.77       217


In [38]:
validation = pd.read_csv('validation_data_nofacies.csv')

In [39]:
validation.head()


Out[39]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000
1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978
2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956
3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933
4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911

In [40]:
validation.describe()


Out[40]:
Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
count 830.000000 830.00000 830.000000 830.000000 830.000000 830.000000 830.000000 830.000000
mean 2987.070482 57.61173 0.666312 2.851964 11.655277 3.654178 1.678313 0.535807
std 94.391925 27.52774 0.288367 3.442074 5.190236 0.649793 0.467405 0.283062
min 2808.000000 12.03600 -0.468000 -8.900000 1.855000 2.113000 1.000000 0.013000
25% 2911.625000 36.77325 0.541000 0.411250 7.700000 3.171500 1.000000 0.300000
50% 2993.750000 58.34450 0.675000 2.397500 10.950000 3.515500 2.000000 0.547500
75% 3055.375000 73.05150 0.850750 4.600000 14.793750 4.191500 2.000000 0.778000
max 3160.500000 220.41300 1.507000 16.500000 31.335000 6.321000 2.000000 1.000000

Making the division between SH and LM


In [41]:
validation['Label_Form_SH_LM'] = validation.Formation.apply((label_two_groups_formation))

In [42]:
validation.head()


Out[42]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS Label_Form_SH_LM
0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000 1
1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978 1
2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956 1
3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933 1
4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911 1

In [43]:
validation_SH = divisao_sh(validation)
validation_LM = divisao_lm(validation)

In [44]:
validation_SH.head()


Out[44]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000
1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978
2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956
3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933
4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911

In [45]:
validation_LM.head()


Out[45]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS
43 A1 LM STUART 2829.5 47.345 0.584 7.0 16.30 3.527 2 1.000
44 A1 LM STUART 2830.0 35.733 0.730 6.4 10.20 3.928 2 0.987
45 A1 LM STUART 2830.5 29.327 0.873 2.7 7.85 4.330 2 0.974
46 A1 LM STUART 2831.0 28.242 0.963 1.4 6.30 4.413 2 0.961
47 A1 LM STUART 2831.5 34.558 1.018 1.8 5.60 4.511 2 0.947

Removing the colums: Formation, Well Name, Depth


In [46]:
X_val_SH = validation_SH.drop(['Formation','Well Name','Depth','NM_M'], axis=1)
X_val_LM = validation_LM.drop(['Formation','Well Name','Depth','NM_M'], axis=1)

In [47]:
X_val_SH.head()


Out[47]:
GR ILD_log10 DeltaPHI PHIND PE RELPOS
0 66.276 0.630 3.3 10.65 3.591 1.000
1 77.252 0.585 6.5 11.95 3.341 0.978
2 82.899 0.566 9.4 13.60 3.064 0.956
3 80.671 0.593 9.5 13.25 2.977 0.933
4 75.971 0.638 8.7 12.35 3.020 0.911

In [48]:
X_val_LM.head()


Out[48]:
GR ILD_log10 DeltaPHI PHIND PE RELPOS
43 47.345 0.584 7.0 16.30 3.527 1.000
44 35.733 0.730 6.4 10.20 3.928 0.987
45 29.327 0.873 2.7 7.85 4.330 0.974
46 28.242 0.963 1.4 6.30 4.413 0.961
47 34.558 1.018 1.8 5.60 4.511 0.947

In [49]:
pred_val_SH = ETC_SH.predict(X_val_SH)

In [50]:
pred_val_LM =ETC_LM.predict(X_val_LM)

In [51]:
pred_val_SH = pd.DataFrame(pred_val_SH, index=X_val_SH.index)
pred_val_LM = pd.DataFrame(pred_val_LM, index=X_val_LM.index)
pred_val = pd.concat([pred_val_SH,pred_val_LM])
pred_val = pred_val.sort_index()

In [52]:
pred_val.describe()


Out[52]:
0
count 830.000000
mean 5.356627
std 2.386239
min 1.000000
25% 3.000000
50% 6.000000
75% 8.000000
max 9.000000

In [53]:
validation['Facies Pred'] = pred_val

In [54]:
validation=validation.drop(['Label_Form_SH_LM'],axis=1)

In [55]:
validation.head()


Out[55]:
Formation Well Name Depth GR ILD_log10 DeltaPHI PHIND PE NM_M RELPOS Facies Pred
0 A1 SH STUART 2808.0 66.276 0.630 3.3 10.65 3.591 1 1.000 3
1 A1 SH STUART 2808.5 77.252 0.585 6.5 11.95 3.341 1 0.978 3
2 A1 SH STUART 2809.0 82.899 0.566 9.4 13.60 3.064 1 0.956 2
3 A1 SH STUART 2809.5 80.671 0.593 9.5 13.25 2.977 1 0.933 3
4 A1 SH STUART 2810.0 75.971 0.638 8.7 12.35 3.020 1 0.911 3

In [56]:
validation.to_csv('Prediction.csv')

In [ ]: