In [1]:
%matplotlib inline
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
import numpy as np
import math
In [3]:
track_params = pd.read_csv('../TRAIN/track_parms.csv')
In [4]:
track_params.tail()
Out[4]:
In [21]:
# Bin the phi values to get multi-class labels
track_params['phi_binned'], phi_bins = pd.cut(track_params.phi,
bins=range(-10, 12, 2),
retbins=True)
track_params['phi_binned'] = track_params['phi_binned'].astype(str)
In [22]:
track_params.head()
Out[22]:
In [23]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
In [24]:
DATAGEN = ImageDataGenerator(rescale=1./255.,
validation_split=0.25)
In [25]:
height = 100
width = 36
def create_generator(target, subset, class_mode,
idg=DATAGEN, df=track_params, N=1000):
return idg.flow_from_dataframe(
dataframe=track_params.head(N),
directory="../TRAIN",
x_col="filename",
y_col=target,
subset=subset,
target_size=(height, width),
batch_size=32,
seed=314,
shuffle=True,
class_mode=class_mode,
)
In [26]:
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import (
Conv2D, Activation, MaxPooling2D,
Flatten, Dense, Dropout, Input
)
In [27]:
mc_train_generator = create_generator(
target="phi_binned",
subset="training",
class_mode="categorical",
N=10000
)
mc_val_generator = create_generator(
target="phi_binned",
subset="validation",
class_mode="categorical",
N=10000
)
In [28]:
width = 36
height = 100
channels = 3
def multiclass_classifier():
model = Sequential()
# Convoluional Layer
model.add(Conv2D(32, (3, 3), input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten())
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model
In [29]:
STEP_SIZE_TRAIN = mc_train_generator.n//mc_train_generator.batch_size
STEP_SIZE_VAL = mc_val_generator.n//mc_val_generator.batch_size
In [30]:
mc_model = multiclass_classifier()
mc_history = mc_model.fit_generator(
generator=mc_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=mc_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=10
)
In [31]:
plt.plot(mc_history.history['accuracy'], label="Train Accuracy")
plt.plot(mc_history.history['val_accuracy'], label="Validation Accuracy")
plt.legend()
plt.show()
In [32]:
holdout_track_params = pd.read_csv('../VALIDATION/track_parms.csv')
holdout_track_params['phi_binned'] = pd.cut(
holdout_track_params['phi'],
bins=phi_bins
)
holdout_track_params['phi_binned'] = (
holdout_track_params['phi_binned'].astype(str)
)
In [33]:
mc_holdout_generator = DATAGEN.flow_from_dataframe(
dataframe=holdout_track_params,
directory="../VALIDATION",
x_col="filename",
y_col="phi_binned",
subset=None,
target_size=(height, width),
batch_size=32,
seed=314,
shuffle=False,
class_mode="categorical",
)
In [34]:
holdout_track_params['y_pred'] = mc_model.predict_classes(mc_holdout_generator)
In [35]:
holdout_track_params['y_true'] = mc_holdout_generator.classes
In [36]:
import numpy as np
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm,
target_names,
title='Confusion matrix',
cmap=None,
normalize=True):
"""
given a sklearn confusion matrix (cm), make a nice plot
Arguments
---------
cm: confusion matrix from sklearn.metrics.confusion_matrix
target_names: given classification classes such as [0, 1, 2]
the class names, for example: ['high', 'medium', 'low']
title: the text to display at the top of the matrix
cmap: the gradient of the values displayed from matplotlib.pyplot.cm
see http://matplotlib.org/examples/color/colormaps_reference.html
plt.get_cmap('jet') or plt.cm.Blues
normalize: If False, plot the raw numbers
If True, plot the proportions
Usage
-----
plot_confusion_matrix(cm = cm, # confusion matrix created by
# sklearn.metrics.confusion_matrix
normalize = True, # show proportions
target_names = y_labels_vals, # list of names of the classes
title = best_estimator_name) # title of graph
Citiation
---------
http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
"""
import matplotlib.pyplot as plt
import numpy as np
import itertools
accuracy = np.trace(cm) / float(np.sum(cm))
misclass = 1 - accuracy
if cmap is None:
cmap = plt.get_cmap('Blues')
plt.figure(figsize=(10, 8))
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
if target_names is not None:
tick_marks = np.arange(len(target_names))
plt.xticks(tick_marks, target_names, rotation=45)
plt.yticks(tick_marks, target_names)
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
thresh = cm.max() / 1.5 if normalize else cm.max() / 2
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if normalize:
plt.text(j, i, "{:0.4f}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
else:
plt.text(j, i, "{:,}".format(cm[i, j]),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
plt.show()
In [37]:
y_pred = mc_model.predict_classes(mc_holdout_generator)
y_true = mc_holdout_generator.labels
label_list = ['(-10.0, -8.0]', '(-8.0, -6.0]', '(-6.0, -4.0]', '(-4.0, -2.0]',
'(-2.0, 0.0]', '(0.0, 2.0]', '(2.0, 4.0]', '(4.0, 6.0]', '(6.0, 8.0]',
'(8.0, 10.0]']
plot_confusion_matrix(confusion_matrix(y_true, y_pred),
target_names=label_list,
normalize=False)
In [ ]: