In [1]:
import pandas as pd
training_data = pd.read_csv('../training_data.csv')
Isolate X and y:
In [2]:
X = training_data.drop(['Formation', 'Well Name', 'Depth','Facies'], axis=1).values
y = training_data['Facies'].values
We want the well names to use as groups in the k-fold analysis, so we'll get those too:
In [3]:
wells = training_data["Well Name"].values
Now we train as normal, but LeaveOneGroupOut gives us the approriate indices from X and y to test against one well at a time:
In [4]:
from sklearn.svm import SVC
from sklearn.model_selection import LeaveOneGroupOut
logo = LeaveOneGroupOut()
for train, test in logo.split(X, y, groups=wells):
well_name = wells[test[0]]
score = SVC().fit(X[train], y[train]).score(X[test], y[test])
print("{:>20s} {:.3f}".format(well_name, score))
In [ ]: