In [1]:
    
%matplotlib inline
    
In [50]:
    
import pandas as pd
import matplotlib.pyplot as plt
    
In [64]:
    
# Read in the meta info and target variables
labels = pd.read_csv('../TRAIN/track_parms.csv')
labels = labels.rename(columns={'phi': 'theta'})
labels[['filename', 'theta', 'z']].tail()
    
    Out[64]:
In [65]:
    
# What do our distributions look like?
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3,
                                    figsize=(12, 4))
labels[['theta', 'z']].hist(ax=[ax0, ax1])
labels.plot(x='z', y='theta', kind='hexbin',
            sharex=False, cmap="Blues", ax=ax2)
    
    Out[65]:
    
In [66]:
    
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import Sequential
from tensorflow.keras.layers import (
    Conv2D, Activation, MaxPooling2D,
    Flatten, Dense, Dropout
)
height = 100
width = 36
channels = 3
    
In [67]:
    
def multiclass_classifier():
    model = Sequential()
    # Convolution Layer
    model.add(Conv2D(filters=32,
                     kernel_size=(3, 3),
                     input_shape=(height, width, channels)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # Dense, Classification Layer
    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    
    return model
    
In [68]:
    
# Bin the theta values to get multi-class labels
labels['theta_binned'], theta_bins = pd.cut(labels['theta'],
                                            bins=range(-10, 12, 2),
                                            retbins=True)
labels['theta_binned'] = labels['theta_binned'].astype(str)
labels[['filename', 'theta', 'theta_binned']].tail()
    
    Out[68]:
In [56]:
    
datagen = ImageDataGenerator(rescale=1./255.,
                             validation_split=0.25)
n_subsample = 10000
batch_size = 32
train_gen = datagen.flow_from_dataframe(
    dataframe=labels.head(n_subsample),
    directory="../TRAIN",
    x_col="filename",
    y_col="theta_binned",
    subset="training",
    target_size=(height, width),
    batch_size=batch_size,
    seed=314,
    shuffle=False,
    class_mode="categorical",
)
val_gen = datagen.flow_from_dataframe(
    dataframe=labels.head(n_subsample),
    directory="../TRAIN",
    x_col="filename",
    y_col="theta_binned",
    subset="validation",
    target_size=(height, width),
    batch_size=batch_size,
    seed=314,
    shuffle=False,
    class_mode="categorical",
)
    
    
In [57]:
    
STEP_SIZE_TRAIN = train_gen.n//train_gen.batch_size
STEP_SIZE_VAL = val_gen.n//val_gen.batch_size
    
In [30]:
    
classifier = multiclass_classifier()
clf_history = classifier.fit_generator(
    generator=train_gen,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=val_gen,
    validation_steps=STEP_SIZE_VAL,
    epochs=10
)
    
    
In [31]:
    
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2,
                               figsize=(12, 4))
plt.sca(ax0)
plt.plot(clf_history.history['accuracy'],
         label="Train Accuracy")
plt.plot(clf_history.history['val_accuracy'],
         label="Validation Accuracy")
plt.legend()
plt.sca(ax1)
plt.plot(clf_history.history['loss'],
         label="Train Loss")
plt.plot(clf_history.history['val_loss'],
         label="Validation Loss")
plt.legend()
plt.show()
    
    
In [60]:
    
import numpy as np
from sklearn.metrics import confusion_matrix
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot
    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix
    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']
    title:        the text to display at the top of the matrix
    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues
    normalize:    If False, plot the raw numbers
                  If True, plot the proportions
    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph
    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html
    """
    import matplotlib.pyplot as plt
    import numpy as np
    import itertools
    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy
    if cmap is None:
        cmap = plt.get_cmap('Blues')
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()
    
In [63]:
    
y_pred = classifier.predict_classes(val_gen)
y_true = val_gen.labels
label_list = ['(-10.0, -8.0]', '(-8.0, -6.0]', '(-6.0, -4.0]', '(-4.0, -2.0]',
              '(-2.0, 0.0]', '(0.0, 2.0]', '(2.0, 4.0]', '(4.0, 6.0]', '(6.0, 8.0]',
              '(8.0, 10.0]']
plot_confusion_matrix(confusion_matrix(y_true, y_pred),
                      target_names=label_list,
                      normalize=False)
    
    
In [73]:
    
def double_regression_model():
    model = Sequential()
    # Convolution Layer
    model.add(Conv2D(filters=32,
                     kernel_size=(3, 3),
                     input_shape=(height, width, channels)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # Dense, Classification Layer
    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse'])
    
    return model
    
In [70]:
    
n_subsample = 100000
batch_size = 32
train_gen = datagen.flow_from_dataframe(
    dataframe=labels.head(n_subsample),
    directory="../TRAIN",
    x_col="filename",
    y_col=["z", "theta"],
    subset="training",
    target_size=(height, width),
    batch_size=batch_size,
    seed=314,
    shuffle=False,
    class_mode="other",
)
val_gen = datagen.flow_from_dataframe(
    dataframe=labels.head(n_subsample),
    directory="../TRAIN",
    x_col="filename",
    y_col=["z", "theta"],
    subset="validation",
    target_size=(height, width),
    batch_size=batch_size,
    seed=314,
    shuffle=False,
    class_mode="other",
)
    
    
In [71]:
    
STEP_SIZE_TRAIN = train_gen.n//train_gen.batch_size
STEP_SIZE_VAL = val_gen.n//val_gen.batch_size
    
In [ ]:
    
    
In [80]:
    
regressor = double_regression_model()
regressor.summary()
    
    
In [74]:
    
reg_history = regressor.fit_generator(
    generator=train_gen,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=val_gen,
    validation_steps=STEP_SIZE_VAL,
    epochs=10
)
    
    
In [79]:
    
plt.plot(reg_history.history['mse'],
         label="Train MSE")
plt.plot(reg_history.history['val_mse'],
         label="Validation MSE")
plt.ylim([0, 5.5])
plt.legend()
plt.show()
    
    
In [73]:
    
def deep_double_regression_model():
    model = Sequential()
    # Convolution Layer
    model.add(Conv2D(filters=32,
                     kernel_size=(3, 3),
                     input_shape=(height, width, channels)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # Dense, Classification Layer
    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2))
    model.compile(loss='mean_squared_error',
                  optimizer='adam',
                  metrics=['mse'])
    
    return model
    
In [ ]: