In [1]:
%matplotlib inline
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
import numpy as np
import math
In [3]:
track_params = pd.read_csv('../TRAIN/track_parms.csv')
In [4]:
track_params.tail()
Out[4]:
In [5]:
# Create binary labels
track_params['phi_bool'] = track_params.phi.apply(lambda x: "+" if x > 0 else "-")
# Bin the phi values to get multi-class labels
track_params['phi_binned'], phi_bins = pd.cut(track_params.phi, bins=11,
retbins=True)
track_params['phi_binned'] = track_params['phi_binned'].astype(str)
# Bin the z values to get multi-class labels
track_params['z_binned'], z_bins = pd.cut(track_params.z, bins=11,
retbins=True)
track_params['z_binned'] = track_params['z_binned'].astype(str)
In [10]:
track_params[['phi', 'z']].hist()
Out[10]:
In [7]:
track_params.plot(x='z', y='phi', kind='hexbin', sharex=False, cmap="Blues")
Out[7]:
In [11]:
track_params.head()
Out[11]:
In [12]:
holdout_track_params = pd.read_csv('../VALIDATION/track_parms.csv')
holdout_track_params['phi_bool'] = (
holdout_track_params
.phi
.apply(lambda x: "+" if x > 0 else "-")
)
holdout_track_params['phi_binned'] = pd.cut(
holdout_track_params['phi'],
bins=phi_bins
)
holdout_track_params['phi_binned'] = holdout_track_params['phi_binned'].astype(str)
holdout_track_params['z_binned'] = pd.cut(
holdout_track_params['z'],
bins=z_bins
)
holdout_track_params['z_binned'] = holdout_track_params['z_binned'].astype(str)
In [13]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
In [41]:
DATAGEN = ImageDataGenerator(rescale=1./255.,
validation_split=0.25)
In [43]:
height = 100
width = 36
def create_generator(target, subset, class_mode,
idg=DATAGEN, df=track_params, N=1000):
return idg.flow_from_dataframe(
dataframe=track_params.head(N),
directory="../TRAIN",
x_col="filename",
y_col=target,
subset=subset,
target_size=(height, width),
batch_size=32,
seed=314,
shuffle=True,
class_mode=class_mode,
)
In [44]:
binary_train_generator = create_generator(
target="phi_bool",
subset="training",
class_mode="binary"
)
binary_val_generator = create_generator(
target="phi_bool",
subset="validation",
class_mode="binary"
)
In [165]:
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import (
Conv2D, Activation, MaxPooling2D,
Flatten, Dense, Dropout, Input
)
In [30]:
width = 36
height = 100
channels = 3
def binary_classifier():
model = Sequential()
# Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 2
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 3
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten()) # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model
In [61]:
STEP_SIZE_TRAIN = binary_train_generator.n//binary_train_generator.batch_size
STEP_SIZE_VAL = binary_val_generator.n//binary_val_generator.batch_size
In [62]:
binary_model = binary_classifier()
binary_history = binary_model.fit_generator(
generator=binary_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=binary_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=5
)
In [63]:
plt.plot(history.history['accuracy'], label="Train Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.legend()
plt.show()
In [48]:
mc_train_generator = create_generator(
target="phi_binned",
subset="training",
class_mode="categorical",
N=10000
)
mc_val_generator = create_generator(
target="phi_binned",
subset="validation",
class_mode="categorical",
N=10000
)
In [290]:
width = 36
height = 100
channels = 3
def multiclass_classifier():
model = Sequential()
# Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 2
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 3
#model.add(Conv2D(64, (3, 3)))
#model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense, Classification Layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(11))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
return model
In [287]:
STEP_SIZE_TRAIN = mc_train_generator.n//mc_train_generator.batch_size
STEP_SIZE_VAL = mc_val_generator.n//mc_val_generator.batch_size
In [288]:
mc_model = multiclass_classifier()
mc_history = mc_model.fit_generator(
generator=mc_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=mc_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=15
)
In [289]:
plt.plot(mc_history.history['accuracy'], label="Train Accuracy")
plt.plot(mc_history.history['val_accuracy'], label="Validation Accuracy")
plt.legend()
plt.show()
In [291]:
mc_model = multiclass_classifier()
mc_history = mc_model.fit_generator(
generator=mc_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=mc_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=15
)
In [292]:
plt.plot(mc_history.history['accuracy'], label="Train Accuracy")
plt.plot(mc_history.history['val_accuracy'], label="Validation Accuracy")
plt.legend()
plt.show()
In [70]:
holdout_track_params.head()
Out[70]:
In [124]:
mc_holdout_generator = datagen.flow_from_dataframe(
dataframe=holdout_track_params,
directory="../VALIDATION",
x_col="filename",
y_col="phi_binned",
subset=None,
target_size=(height, width),
batch_size=32,
seed=314,
shuffle=False,
class_mode="categorical",
)
In [125]:
holdout_track_params['y_pred'] = mc_model.predict_classes(mc_holdout_generator)
In [126]:
holdout_track_params['y_true'] = mc_holdout_generator.classes
In [127]:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
#classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots(figsize=(10, 10))
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
In [128]:
class_names = [x for x in mc_holdout_generator.class_indices.keys()]
In [129]:
plot_confusion_matrix(holdout_track_params['y_true'],
holdout_track_params['y_pred'],
classes=class_names)
Out[129]:
In [52]:
reg_train_generator = create_generator(
target="phi",
subset="training",
class_mode="raw",
N=10000
)
reg_val_generator = create_generator(
target="phi",
subset="validation",
class_mode="raw",
N=10000
)
In [159]:
width = 36
height = 100
channels = 3
def regression_model():
model = Sequential()
# Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 2
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 3
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense regression layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(loss='mean_squared_error',
optimizer='adam',
metrics=['mse'])
return model
In [67]:
STEP_SIZE_TRAIN = reg_train_generator.n//reg_train_generator.batch_size
STEP_SIZE_VAL = reg_val_generator.n//reg_val_generator.batch_size
In [68]:
reg_model = regression_model()
reg_history = reg_model.fit_generator(
generator=reg_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=reg_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=15
)
In [69]:
plt.plot(reg_history.history['mse'], label="Train MSE")
plt.plot(reg_history.history['val_mse'], label="Validation MSE")
plt.legend()
plt.show()
In [225]:
double_reg_train_generator = create_generator(
target=["z", "phi"],
subset="training",
class_mode="raw",
N=100000
)
double_reg_val_generator = create_generator(
target=["z", "phi"],
subset="validation",
class_mode="raw",
N=100000
)
In [226]:
width = 36
height = 100
channels = 3
def double_regression_model():
model = Sequential()
# Layer 1
model.add(Conv2D(32, (3, 3), input_shape=(height, width, channels)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 2
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Layer 3
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
# Dense regression layer
model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.compile(optimizer="adam",
loss="mean_squared_error",
metrics=["mse"])
return model
In [227]:
STEP_SIZE_TRAIN = double_reg_train_generator.n//double_reg_train_generator.batch_size
STEP_SIZE_VAL = double_reg_val_generator.n//double_reg_val_generator.batch_size
In [228]:
double_reg_model = double_regression_model()
double_reg_history = double_reg_model.fit_generator(
generator=double_reg_train_generator,
steps_per_epoch=STEP_SIZE_TRAIN,
validation_data=double_reg_val_generator,
validation_steps=STEP_SIZE_VAL,
epochs=15
)
In [233]:
plt.plot(double_reg_history.history['mse'], label="Train MSE")
plt.plot(double_reg_history.history['val_mse'], label="Validation MSE")
plt.legend()
plt.show()
In [234]:
double_reg_holdout_generator = datagen.flow_from_dataframe(
dataframe=holdout_track_params,
directory="../VALIDATION",
x_col="filename",
y_col=["z", "phi"],
subset=None,
target_size=(height, width),
batch_size=32,
seed=314,
shuffle=False,
class_mode="raw",
)
In [235]:
holdout_track_params['z_pred'] = 0.0
holdout_track_params['phi_pred'] = 0.0
In [236]:
y_pred = double_reg_model.predict(double_reg_holdout_generator)
In [238]:
holdout_track_params['z_pred'] = [y[0] for y in y_pred]
holdout_track_params['phi_pred'] = [y[1] for y in y_pred]
In [244]:
holdout_track_params['delta_z'] = holdout_track_params.eval('z - z_pred')
holdout_track_params['delta_phi'] = holdout_track_params.eval('phi - phi_pred')
In [337]:
from sklearn.metrics import r2_score, mean_squared_error
In [339]:
print(r2_score(holdout_track_params['phi'], holdout_track_params['phi_pred']))
print(mean_squared_error(holdout_track_params['phi'], holdout_track_params['phi_pred']))
In [341]:
print(r2_score(holdout_track_params['z'], holdout_track_params['z_pred']))
print(mean_squared_error(holdout_track_params['z'], holdout_track_params['z_pred']))
In [345]:
fig, (ax0, ax1, ax2) = plt.subplots(nrows=1, ncols=3, figsize=(14, 5))
holdout_track_params['delta_z'].hist(bins=10, alpha=0.5,
log=False, ax=ax0)
holdout_track_params['delta_z'].hist(bins=10, alpha=0.5,
log=True, ax=ax1)
holdout_track_params['delta_phi'].hist(bins=10, ax=ax2,
alpha=0.5)
ax0.set_title(r'Residual $\Delta z$')
ax1.set_title(r'Residual $\Delta z$ (log)')
ax2.set_title(r'Residual: $\Delta\theta$')
plt.show()
In [278]:
plt.hexbin(x=holdout_track_params['z'], y=holdout_track_params['z_pred'])
_ = plt.title('z (vertex)')
_ = plt.xlabel('z_true')
_ = plt.ylabel('z_pred')
In [354]:
from matplotlib import cm
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2,
figsize=(12, 6),
sharex=True,
sharey=True)
scatter = plt.scatter(x=holdout_track_params['z'],
y=holdout_track_params['z_pred'],
c=holdout_track_params['phi'],
cmap=cm.seismic,
vmin=-10., vmax=10.,
alpha=0.2)
plt.colorbar(scatter, label=r'$\theta$')
ax1.set_facecolor("#888888")
_ = plt.title(r'z (vertex), colored by $\theta$')
_ = plt.xlabel('z_true')
plt.sca(ax0)
scatter = plt.scatter(x=holdout_track_params['z'],
y=holdout_track_params['z_pred'],
alpha=0.2)
_ = plt.title('z (vertex)')
_ = plt.xlabel('z_true')
_ = plt.ylabel('z_pred')
plt.subplots_adjust(right=1.)
In [354]:
from matplotlib import cm
fig, (ax0, ax1) = plt.subplots(nrows=1, ncols=2,
figsize=(12, 6),
sharex=True,
sharey=True)
scatter = plt.scatter(x=holdout['z'],
y=holdout['z_pred'],
c=holdout['theta'],
cmap=cm.seismic,
vmin=-10., vmax=10.,
alpha=0.2)
plt.colorbar(scatter, label=r'$\theta$')
ax1.set_facecolor("#888888")
_ = plt.title(r'z (vertex), colored by $\theta$')
_ = plt.xlabel('z_true')
plt.sca(ax0)
scatter = plt.scatter(x=holdout['z'],
y=holdout['z_pred'],
alpha=0.2)
_ = plt.title('z (vertex)')
_ = plt.xlabel('z_true')
_ = plt.ylabel('z_pred')
plt.subplots_adjust(right=1.)
In [281]:
plt.scatter(x=holdout_track_params['phi'], y=holdout_track_params['phi_pred'],
alpha=0.2)
_ = plt.title(r'$\theta$')
_ = plt.xlabel(r'$\theta$_true')
_ = plt.ylabel(r'$\theta$_pred')
In [282]:
holdout_track_params['z_squared_error'] = holdout_track_params.eval(
'(z - z_pred) ** 2'
)
In [283]:
holdout_track_params['phi_squared_error'] = holdout_track_params.eval(
'(phi - phi_pred) ** 2'
)
In [285]:
holdout_track_params[['z_squared_error', 'phi_squared_error']].sum()
Out[285]:
In [ ]: