In [1]:
%matplotlib inline
In [50]:
import pandas as pd
import matplotlib.pyplot as plt
In [64]:
# Read in the meta info and target variables
labels = pd.read_csv('../TRAIN/track_parms.csv')
labels = labels.rename(columns={'phi': 'theta'})
labels[['filename', 'theta', 'z']].tail()
Out[64]:
In [65]:
# What do our distributions look like?
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3,
figsize=(12, 4))
labels[['theta', 'z']].hist(ax=[ax0, ax1])
labels.plot(x='z', y='theta', kind='hexbin',
sharex=False, cmap="Blues", ax=ax2)
Out[65]:
In [66]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
Conv2D, Activation, MaxPooling2D,
Flatten, Dense, Dropout
)
height = 100
width = 36
channels = 3
In [67]:
def multiclass_classifier():
model = Sequential()
# Convolution Layer
model.add(Conv2D(filters=32,
kernel_size=(3, 3),
input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model
In [68]:
# Bin the theta values to get multi-class labels
labels['theta_binned'], theta_bins = pd.cut(labels['theta'],
bins=range(-10, 12, 2),
retbins=True)
labels['theta_binned'] = labels['theta_binned'].astype(str)
labels[['filename', 'theta', 'theta_binned']].tail()
Out[68]:
In [56]:
datagen = ImageDataGenerator(rescale=1./255.,
validation_split=0.25)
n_subsample = 10000
batch_size = 32
train_gen = datagen.flow_from_dataframe(
dataframe=labels.head(n_subsample),
directory="../TRAIN",
x_col="filename",
y_col="theta_binned",
subset="training",
target_size=(height, width),
batch_size=batch_size,
seed=314,
shuffle=False,
class_mode="categorical",
)
val_gen = datagen.flow_from_dataframe(
dataframe=labels.head(n_subsample),
directory="../TRAIN",
x_col="filename",
y_col="theta_binned",
subset="validation",
target_size=(height, width),
batch_size=batch_size,
seed=314,
shuffle=False,
class_mode="categorical",
)
In [57]:
STEP_SIZE_TRAIN = train_gen.n//train_gen.batch_size
STEP_SIZE_VAL = val_gen.n//val_gen.batch_size
In [30]:
classifier = multiclass_classifier()
clf_history = classifier.fit_generator(
generator=train_gen,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=val_gen,
validation_steps=STEP_SIZE_VAL,
epochs=10
)
In [31]:
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2,
figsize=(12, 4))
plt.sca(ax0)
plt.plot(clf_history.history['accuracy'],
label="Train Accuracy")
plt.plot(clf_history.history['val_accuracy'],
label="Validation Accuracy")
plt.legend()
plt.sca(ax1)
plt.plot(clf_history.history['loss'],
label="Train Loss")
plt.plot(clf_history.history['val_loss'],
label="Validation Loss")
plt.legend()
plt.show()
In [60]:
import numpy as np
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(10, 8))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
In [63]:
y_pred = classifier.predict_classes(val_gen)
y_true = val_gen.labels
label_list = ['(-10.0, -8.0]', '(-8.0, -6.0]', '(-6.0, -4.0]', '(-4.0, -2.0]',
'(-2.0, 0.0]', '(0.0, 2.0]', '(2.0, 4.0]', '(4.0, 6.0]', '(6.0, 8.0]',
'(8.0, 10.0]']
plot_confusion_matrix(confusion_matrix(y_true, y_pred),
target_names=label_list,
normalize=False)
In [73]:
def double_regression_model():
model = Sequential()
# Convolution Layer
model.add(Conv2D(filters=32,
kernel_size=(3, 3),
input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['mse'])
return model
In [70]:
n_subsample = 100000
batch_size = 32
train_gen = datagen.flow_from_dataframe(
dataframe=labels.head(n_subsample),
directory="../TRAIN",
x_col="filename",
y_col=["z", "theta"],
subset="training",
target_size=(height, width),
batch_size=batch_size,
seed=314,
shuffle=False,
class_mode="other",
)
val_gen = datagen.flow_from_dataframe(
dataframe=labels.head(n_subsample),
directory="../TRAIN",
x_col="filename",
y_col=["z", "theta"],
subset="validation",
target_size=(height, width),
batch_size=batch_size,
seed=314,
shuffle=False,
class_mode="other",
)
In [71]:
STEP_SIZE_TRAIN = train_gen.n//train_gen.batch_size
STEP_SIZE_VAL = val_gen.n//val_gen.batch_size
In [ ]:
In [80]:
regressor = double_regression_model()
regressor.summary()
In [74]:
reg_history = regressor.fit_generator(
generator=train_gen,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=val_gen,
validation_steps=STEP_SIZE_VAL,
epochs=10
)
In [79]:
plt.plot(reg_history.history['mse'],
label="Train MSE")
plt.plot(reg_history.history['val_mse'],
label="Validation MSE")
plt.ylim([0, 5.5])
plt.legend()
plt.show()
In [73]:
def deep_double_regression_model():
model = Sequential()
# Convolution Layer
model.add(Conv2D(filters=32,
kernel_size=(3, 3),
input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['mse'])
return model
In [ ]: