Alexsandro G. Cerqueira,
Alã de C. Damasceno
There are tow main notebooks:
In [1]:
from libtools import *
In [2]:
training = pd.read_csv('data-test.csv')
In [3]:
training.head()
Out[3]:
In [4]:
training.describe()
Out[4]:
In [5]:
training = training.fillna(-99999)
In [6]:
blind = pd.read_csv('blind.csv')
In [7]:
blind.head()
Out[7]:
In [8]:
blind.describe()
Out[8]:
In [9]:
training_SH = divisao_sh(training)
training_LM = divisao_lm(training)
blind_SH = divisao_sh(blind)
blind_LM = divisao_lm(blind)
In [10]:
training_SH.head()
Out[10]:
In [11]:
training_LM.head()
Out[11]:
In [12]:
blind_SH.head()
Out[12]:
In [13]:
blind_LM.head()
Out[13]:
In [14]:
X_SH = training_SH.drop(['Facies'],axis=1)
y_SH = training_SH['Facies']
X_LM = training_LM.drop(['Facies'],axis=1)
y_LM = training_LM['Facies']
X_SH_blind = blind_SH.drop(['Facies'],axis=1)
y_SH_blind = blind_SH['Facies']
X_LM_blind = blind_LM.drop(['Facies'],axis=1)
y_LM_blind = blind_LM['Facies']
In [15]:
from sklearn.model_selection import train_test_split
X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)
X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)
In [16]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import classification_report,confusion_matrix
In [17]:
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)
ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)
Out[17]:
In [18]:
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))
In [19]:
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))
In [20]:
blind_pred_SH = ETC_SH.predict(X_SH_blind)
print(confusion_matrix(y_SH_blind, blind_pred_SH))
print(classification_report(y_SH_blind, blind_pred_SH))
In [21]:
blind_pred_LM = ETC_LM.predict(X_LM_blind)
print(confusion_matrix(y_LM_blind, blind_pred_LM))
print(classification_report(y_LM_blind, blind_pred_LM))
In [22]:
blind_pred_SH = pd.DataFrame(blind_pred_SH, index=X_SH_blind.index)
blind_pred_LM = pd.DataFrame(blind_pred_LM, index=X_LM_blind.index)
pred_blind = pd.concat([blind_pred_SH,blind_pred_LM])
pred_blind = pred_blind.sort_index()
In [23]:
y_blind = blind['Facies']
In [24]:
print(confusion_matrix(y_blind, pred_blind))
print(classification_report(y_blind, pred_blind))
In [25]:
training_data = pd.read_csv('training.csv')
In [26]:
training_data.head()
Out[26]:
In [27]:
training_data.describe()
Out[27]:
In [28]:
training_data_SH = divisao_sh(training_data)
training_data_LM = divisao_lm(training_data)
In [29]:
training_data_SH.describe()
Out[29]:
In [30]:
training_data_LM.describe()
Out[30]:
In [31]:
X_SH = training_data_SH.drop(['Facies'],axis=1)
y_SH = training_data_SH['Facies']
X_LM = training_data_LM.drop(['Facies'],axis=1)
y_LM = training_data_LM['Facies']
In [32]:
X_SH.describe()
Out[32]:
In [33]:
X_LM.describe()
Out[33]:
In [34]:
from sklearn.model_selection import train_test_split
X_train_SH, X_test_SH, y_train_SH, y_test_SH = train_test_split(X_SH, y_SH, test_size=0.1)
X_train_LM, X_test_LM, y_train_LM, y_test_LM = train_test_split(X_LM, y_LM, test_size=0.1)
In [35]:
ETC_SH = ExtraTreesClassifier(n_estimators=500, bootstrap=True)
ETC_LM = ExtraTreesClassifier(n_estimators=500)
ETC_SH.fit(X_train_SH, y_train_SH)
ETC_LM.fit(X_train_LM, y_train_LM)
Out[35]:
In [36]:
pred_SH = ETC_SH.predict(X_test_SH)
print(confusion_matrix(y_test_SH,pred_SH))
print(classification_report(y_test_SH,pred_SH))
In [37]:
pred_LM = ETC_LM.predict(X_test_LM)
print(confusion_matrix(y_test_LM,pred_LM))
print(classification_report(y_test_LM,pred_LM))
In [38]:
validation = pd.read_csv('validation_data_nofacies.csv')
In [39]:
validation.head()
Out[39]:
In [40]:
validation.describe()
Out[40]:
In [41]:
validation['Label_Form_SH_LM'] = validation.Formation.apply((label_two_groups_formation))
In [42]:
validation.head()
Out[42]:
In [43]:
validation_SH = divisao_sh(validation)
validation_LM = divisao_lm(validation)
In [44]:
validation_SH.head()
Out[44]:
In [45]:
validation_LM.head()
Out[45]:
In [46]:
X_val_SH = validation_SH.drop(['Formation','Well Name','Depth','NM_M'], axis=1)
X_val_LM = validation_LM.drop(['Formation','Well Name','Depth','NM_M'], axis=1)
In [47]:
X_val_SH.head()
Out[47]:
In [48]:
X_val_LM.head()
Out[48]:
In [49]:
pred_val_SH = ETC_SH.predict(X_val_SH)
In [50]:
pred_val_LM =ETC_LM.predict(X_val_LM)
In [51]:
pred_val_SH = pd.DataFrame(pred_val_SH, index=X_val_SH.index)
pred_val_LM = pd.DataFrame(pred_val_LM, index=X_val_LM.index)
pred_val = pd.concat([pred_val_SH,pred_val_LM])
pred_val = pred_val.sort_index()
In [52]:
pred_val.describe()
Out[52]:
In [53]:
validation['Facies Pred'] = pred_val
In [54]:
validation=validation.drop(['Label_Form_SH_LM'],axis=1)
In [55]:
validation.head()
Out[55]:
In [56]:
validation.to_csv('Prediction.csv')
In [ ]: