In [194]:
import turicreate as tc
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [84]:
sf = tc.SFrame.read_csv('electrodes_clean.csv')
sf.explore() # in GUI
# optional save to SFrame
# sf = tc.SFrame('electrodes_clean.sframe')
We cannot have both TPLE and TPLE category in same set or results will be biased. So create two data sets: one for regression (removing TPLE category as feature) and one for classification (removing TPLE as feature).
Regarding the final (automatically selected) model:
<model>.summary()
summarizes the model parameters<model>.features
shows which features have been included (= all selected for model building)
In [111]:
sf_reg = sf.remove_column('TPLE category')
sf_class = sf.remove_column('TPLE')
In [112]:
sf_reg_train, sf_reg_test = sf_reg.random_split(0.8)
reg_model = tc.regression.create(sf_reg_train, target = 'TPLE')
In [113]:
reg_model.evaluate(sf_reg_test)
Out[113]:
In [115]:
reg_model.summary()
In [122]:
sf_class_train, sf_class_test = sf_class.random_split(0.8)
In [123]:
class_model = tc.classifier.create(sf_class_train, target = 'TPLE category')
In [175]:
metrics = class_model.evaluate(sf_class_test)
metrics
# metrics['confusion_matrix']
Out[175]:
In [126]:
class_model.summary()
In [192]:
# create new dataset - easier when experimenting with different cutoff values
# remove column 'TPLE category' - otherwise we severely bias results
sf_dev = sf_class.remove_column('TPLE category')
def evaluate_classification_for_cutoff(value):
'''Creates dataframe with predefined cutoff value.
Useful to play with different cutoffs. Value represents the deviation in mm.
Returns metrics of model'''
sf_dev['Deviated'] = sf['TPLE'].apply(lambda tple: 'yes' if tple > value else 'no')
sf_dev_train, sf_dev_test = sf_dev.random_split(0.8)
model = tc.classifier.create(sf_dev_train, target = 'Deviated', verbose = False)
metrics = model.evaluate(sf_dev_test)
return metrics
cutoff_values = [1.5, 2.0, 2.5, 3.0, 3.5, 4.0]
results = {}
for cv in cutoff_values:
metr = evaluate_classification_for_cutoff(cv)
results.update({cv: metr})
In [221]:
plt.figure()
for cutoff, metric in results.items():
acc = metric['accuracy']; auc = metric['auc']
print(f"Cutoff {cutoff} - Accuracy: {acc:.2f} | AUC: {auc:.2f}")
plt.plot(cutoff, acc, 'bo', label = 'Accuracy') # Accuracy in BLUE
plt.plot(cutoff, auc, 'ro', label = 'AUC') # AUC in RED
In [ ]: