03 - Sequence Model Approach

The more 'classical' approach to solving this problem
Train a model that can take any number of 'steps'
Makes a prediction on next step based on previous steps
Learn from full tracks
For test tracks, predict what the next step's values will be



In [1]:

    
%matplotlib inline



In [2]:

    
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, LeakyReLU, Dropout, ReLU, GRU, TimeDistributed, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from jlab import load_test_data, get_test_detector_plane

Load up and prep the datasets



In [3]:

    
X_train = pd.read_csv('MLchallenge2_training.csv')
X_test = load_test_data('test_in.csv')
eval_planes = get_test_detector_plane(X_test)

# Also, load our truth values
y_true = pd.read_csv('test_prediction.csv', names=['x', 'y', 'px', 'py', 'pz'],
                     header=None)



In [4]:

    
X_test.head()









    Out[4]:







  
    
      
      x
      y
      z
      px
      py
      pz
      x1
      y1
      z1
      px1
      ...
      z23
      px23
      py23
      pz23
      x24
      y24
      z24
      px24
      py24
      pz24
    
  
  
    
      0
      0.877
      1.322
      65.0
      -0.244
      -0.053
      2.414
      -10.669
      0.330
      176.944
      -0.254
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      0.786
      -2.483
      65.0
      0.103
      0.432
      2.593
      7.366
      15.502
      176.944
      0.206
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      -13.134
      -26.531
      65.0
      0.064
      -0.021
      0.953
      -7.586
      -30.687
      176.944
      0.027
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      3
      18.454
      2.805
      65.0
      -0.019
      0.069
      1.833
      18.043
      6.797
      176.944
      0.013
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      4
      15.552
      -19.196
      65.0
      -0.010
      -0.011
      2.366
      15.068
      -19.750
      176.944
      -0.014
      ...
      341.28
      -0.014
      -0.002
      2.351
      NaN
      NaN
      343.405
      NaN
      NaN
      NaN
    
  

5 rows × 150 columns



In [5]:

    
y_true.head()

Construct the training data and targets

For each track
- Choose a number N between 8 and 24
- That track will have 6 kinematics for N blocks
- The target variable will be the 6 kinematic variables for the N+1th detector block
This will cause variable length sequences
Apply pad_sequences to prepend with zeros appropriately

Training Dataset



In [10]:

    
N_SAMPLES = len(X_train)
N_DETECTORS = 25
N_KINEMATICS = 6
SHAPE = (N_SAMPLES, N_DETECTORS-1, N_KINEMATICS)



In [11]:

    
X_train_list = []
y_train_array = np.ndarray(shape=(N_SAMPLES, N_KINEMATICS-1))
for ix in range(N_SAMPLES):
    seq_len = np.random.choice(range(8, 25))
    track = X_train.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_train_list.append(track[0:seq_len])
    # Store the kinematics of the next in the sequence
    # Ignore the 3rd one, which is z
    y_train_array[ix] = track[seq_len][[0,1,3,4,5]]



In [12]:

    
for track in X_train_list[:10]:
    print(len(track))



In [13]:

    
X_train_list = pad_sequences(X_train_list, dtype=float)



In [14]:

    
for track in X_train_list[:10]:
    print(len(track))



In [15]:

    
X_train_array = np.array(X_train_list)
X_train_array.shape









    Out[15]:





(194601, 24, 6)



In [16]:

    
y_train_array.shape









    Out[16]:





(194601, 5)

Validation Dataset



In [17]:

    
N_TEST_SAMPLES = len(X_test)



In [18]:

    
y_test_array = y_true.values



In [19]:

    
X_test_list = []
for ix in range(N_TEST_SAMPLES):
    seq_len = get_test_detector_plane(X_test.iloc[ix])
    track = X_test.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_test_list.append(track[0:seq_len])



In [20]:

    
X_test_list = pad_sequences(X_test_list, dtype=float)
X_test_array = np.array(X_test_list)



In [21]:

    
X_test_array.shape









    Out[21]:





(10000, 24, 6)



In [22]:

    
y_test_array.shape









    Out[22]:





(10000, 5)



In [23]:

    
y_true.values.shape









    Out[23]:





(10000, 5)



In [1]:

    
import pandas as pd
import numpy as np
from math import floor
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split



In [2]:

    
data = pd.read_csv('MLchallenge2_training.csv')



In [4]:

    
# Z values are constant -- what are they?
Z_VALS = data[['z'] + [f'z{i}' for i in range(1, 25)]].loc[0].values
# Z-distance from one timestep to another is set; calculate it
Z_DIST = [Z_VALS[i+1] - Z_VALS[i] for i in range(0, 24)] + [0.0]



In [18]:

    
# Number of timesteps
N_DETECTORS = 25
# Provided number of kinematics
N_KINEMATICS = 6
# Number of features after engineering them all
N_FEATURES = 13



In [4]:

    
def get_detector_meta(kin_array, det_id):
    
    # Is there a large gap after this detector?
    # 0 is for padded timesteps
    # 1 is for No, 2 is for Yes
    mind_the_gap = int(det_id % 6 == 0) + 1
    
    # Detector group: 1 (origin), 2, 3, 4, or 5
    det_grp = floor((det_id-1) / 6) + 2
    
    # Detectors numbered 1-6 (origin is 6)
    # (Which one in the group of six is it?)
    det_rank = ((det_id-1) % 6) + 1
    
    # Distance to the next detector?
    z_dist = Z_DIST[det_id]
    
    # Transverse momentum (x-y component)
    pt = np.sqrt(np.square(kin_array[3]) + np.square(kin_array[4]))
    
    # Total momentum
    p_tot = np.sqrt(np.square(kin_array[3])
                    + np.square(kin_array[4])
                    + np.square(kin_array[5]))

    # Put all the calculated features together
    det_meta = np.array([det_id, mind_the_gap, det_grp, det_rank,
                         z_dist, pt, p_tot])
    
    # Return detector data plus calculated features
    return np.concatenate([kin_array, det_meta], axis=None)



In [5]:

    
def tracks_to_time_series(X):
    """Convert training dataframe to multivariate time series training set
    
    Pivots each track to a series ot timesteps. Then randomly truncates them
    to be identical to the provided test set. The step after the truncated
    step is saved as the target.
    
    Truncated sequence are front-padded with zeros.
    
    Parameters
    ----------
    X : pandas.DataFrame
    
    Returns
    -------
    (numpy.ndarray, numpy.ndarray)
        Tuple of the training data and labels
    """
    
    X_ts_list = []
    n_samples = len(X)
    y_array = np.ndarray(shape=(n_samples, N_KINEMATICS-1))
    for ix in range(n_samples):
        # Randomly choose how many detectors the track went through
        track_len = np.random.choice(range(8, 25))
        # Reshape into ts-like
        track = X.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
        #eng_track = np.zeros(shape=(N_DETECTORS, N_FEATURES))
        #for i in range(0, N_DETECTORS):
        #    eng_track[i] = get_detector_meta(track[i], i)
        # Truncate the track to only N detectors
        X_ts_list.append(track[0:track_len])
        # Store the kinematics of the next in the sequence
        # Ignore the 3rd one, which is z
        y_array[ix] = track[track_len][[0,1,3,4,5]]
        
    # Pad the training sequence
    X_ts_list = pad_sequences(X_ts_list, dtype=float)
    X_ts_array = np.array(X_ts_list)
    
    return X_ts_array, y_array



In [6]:

    
X, y = tracks_to_time_series(data)



In [7]:

    
X[3]









    Out[7]:





array([[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00,
         0.00000e+00,  0.00000e+00],
       [ 1.84542e+01,  2.80469e+00,  6.50000e+01, -1.93839e-02,
         6.93840e-02,  1.83259e+00],
       [ 1.80433e+01,  6.79747e+00,  1.76944e+02,  1.30392e-02,
         6.20286e-02,  1.82485e+00],
       [ 1.80588e+01,  6.86968e+00,  1.79069e+02,  1.32902e-02,
         6.19080e-02,  1.82484e+00],
       [ 1.80749e+01,  6.94169e+00,  1.81195e+02,  1.48815e-02,
         6.18535e-02,  1.82481e+00],
       [ 1.80926e+01,  7.01374e+00,  1.83320e+02,  1.53596e-02,
         6.20166e-02,  1.82479e+00],
       [ 1.81105e+01,  7.08585e+00,  1.85445e+02,  1.54059e-02,
         6.18141e-02,  1.82477e+00],
       [ 1.81287e+01,  7.15783e+00,  1.87571e+02,  1.59919e-02,
         6.19073e-02,  1.82469e+00],
       [ 1.86773e+01,  8.71037e+00,  2.35514e+02,  2.58893e-02,
         5.57861e-02,  1.82465e+00],
       [ 1.87077e+01,  8.77517e+00,  2.37639e+02,  2.62399e-02,
         5.55210e-02,  1.82464e+00],
       [ 1.87386e+01,  8.83959e+00,  2.39765e+02,  2.68784e-02,
         5.50810e-02,  1.82463e+00],
       [ 1.87702e+01,  8.90362e+00,  2.41890e+02,  2.72003e-02,
         5.48001e-02,  1.82462e+00],
       [ 1.88022e+01,  8.96717e+00,  2.44015e+02,  2.78705e-02,
         5.44760e-02,  1.82460e+00],
       [ 1.88350e+01,  9.03072e+00,  2.46141e+02,  2.84710e-02,
         5.46504e-02,  1.82457e+00],
       [ 1.97008e+01,  1.03871e+01,  2.94103e+02,  3.72172e-02,
         4.90906e-02,  1.82448e+00],
       [ 1.97443e+01,  1.04442e+01,  2.96228e+02,  3.75142e-02,
         4.91246e-02,  1.82445e+00],
       [ 1.97881e+01,  1.05014e+01,  2.98354e+02,  3.77907e-02,
         4.89708e-02,  1.82442e+00],
       [ 1.98327e+01,  1.05576e+01,  3.00479e+02,  3.88871e-02,
         4.78588e-02,  1.82440e+00],
       [ 1.98782e+01,  1.06133e+01,  3.02604e+02,  3.92594e-02,
         4.77353e-02,  1.82438e+00],
       [ 1.99239e+01,  1.06690e+01,  3.04730e+02,  3.91888e-02,
         4.77707e-02,  1.82436e+00],
       [ 2.05370e+01,  1.14109e+01,  3.32778e+02,  4.02737e-02,
         4.95768e-02,  1.82423e+00],
       [ 2.05840e+01,  1.14690e+01,  3.34903e+02,  4.03437e-02,
         4.99216e-02,  1.82419e+00],
       [ 2.06307e+01,  1.15272e+01,  3.37029e+02,  3.97329e-02,
         5.01374e-02,  1.82419e+00],
       [ 2.06771e+01,  1.15861e+01,  3.39154e+02,  3.97559e-02,
         5.08750e-02,  1.82415e+00]])



In [8]:

    
y[3]









    Out[8]:





array([20.7233   , 11.6457   ,  0.039513 ,  0.0515839,  1.82412  ])



In [9]:

    
X_train, X_test, y_train, y_test = train_test_split(X, y)



In [10]:

    
len(X_train), len(X_test)









    Out[10]:





(145950, 48651)

Multi-layer GRU Model with LReLU



In [7]:

    
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, LeakyReLU, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib



In [20]:

    
def lrelu(x):
    return LeakyReLU()(x)


def gru_model(gru_units=35, dense_units=100,
              dropout_rate=0.25):
    """Model definition.
    
    Three layers of Gated Recurrent Units (GRUs), utilizing
    LeakyReLU activations, finally passing GRU block output
    to a dense layer, passing its output to the final output
    layer, with a touch of dropout in between.
    
    Bon apetit.
    
    Parameters
    ----------
    gru_units : int
    dense_units : int
    dropout_rate : float
    
    Returns
    -------
    tensorflow.keras.models.Sequential
    
    """
    
    model = Sequential()
    
    model.add(GRU(gru_units, activation=lrelu,
                  input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(gru_units, activation=lrelu,
                  return_sequences=True))
    model.add(GRU(gru_units, activation=lrelu))
    
    model.add(Dense(dense_units, activation=lrelu))
    model.add(Dropout(dropout_rate))
    model.add(Dense(N_KINEMATICS-1))
    
    model.compile(loss='mse', optimizer='adam')
    
    return model



In [21]:

    
model = gru_model()



In [22]:

    
model.summary()









    



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_3 (GRU)                  (None, 24, 35)            4515      
_________________________________________________________________
gru_4 (GRU)                  (None, 24, 35)            7560      
_________________________________________________________________
gru_5 (GRU)                  (None, 35)                7560      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               3600      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 505       
=================================================================
Total params: 23,740
Trainable params: 23,740
Non-trainable params: 0
_________________________________________________________________



In [16]:

    
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='gru_model.png', show_shapes=True)









    Out[16]:



In [17]:

    
es = EarlyStopping(monitor='val_loss', mode='min',
                   patience=5, restore_best_weights=True)
history = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    callbacks=[es],
    epochs=50,
)

model.save("gru_model.h5")
joblib.dump(history.history, "gru_model.history")









    



Train on 145950 samples, validate on 48651 samples
Epoch 1/50
 31936/145950 [=====>........................] - ETA: 3:06 - loss: 8.1076WARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-17-b9d369c7fd41> in <module>
      6     validation_data=(X_test, y_test),
      7     callbacks=[es],
----> 8     epochs=50,
      9 )
     10 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    732         max_queue_size=max_queue_size,
    733         workers=workers,
--> 734         use_multiprocessing=use_multiprocessing)
    735 
    736   def evaluate(self,

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    413       # In this case we have created variables on the first call, so we run the
    414       # defunned version which is guaranteed to never create variables.
--> 415       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    416     elif self._stateful_fn is not None:
    417       # In this case we have not created variables on the first call. So we can

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1820     """Calls a graph function specialized to the inputs."""
   1821     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1822     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1823 
   1824   @property

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     59     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,
     60                                                op_name, inputs, attrs,
---> 61                                                num_outputs)
     62   except core._NotOkStatusException as e:
     63     if name is not None:

KeyboardInterrupt:



In [8]:

    
history = joblib.load("dannowitz_jlab2_model_20191031.history")



In [11]:

    
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

Calculate the score on my predictions

Scoring code provided by Thomas Britton
Each kinematic has different weight



In [42]:

    
pred = pd.read_csv('data/submission/dannowitz_jlab2_submission_20191112.csv', header=None)
truth = pd.read_csv('data/ANSWERS.csv', header=None)



In [43]:

    
# Calculate square root of the mean squared error
# Then apply weights and sum them all up
sq_error = (truth - pred).applymap(np.square)
mse = sq_error.sum() / len(truth)
rmse = np.sqrt(mse)
rms_weighted = rmse / [0.03, 0.03, 0.01, 0.01, 0.011]
score = rms_weighted.sum()
score









    Out[43]:





72.17822893581052

Visualize the predictions vs true

You can slice and dice the stats however you want, but it helps to be able to see your predictions at work.

Running history of me tinkering around

I didn't arrive at this construction from the start.
Many different changes and tweaks



In [161]:

    
def lstm_model():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model



In [150]:

    
model = lstm_model()
model.summary()









    



Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_9 (LSTM)                (None, 200)               165600    
_________________________________________________________________
dense_17 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 5)                 505       
=================================================================
Total params: 186,205
Trainable params: 186,205
Non-trainable params: 0
_________________________________________________________________



In [143]:

    
history = model.fit(x=X_train_array, y=y_train_array, validation_data=(X_test_array, y_test_array), epochs=5)









    



Train on 194601 samples, validate on 10000 samples
Epoch 1/5
194601/194601 [==============================] - 259s 1ms/sample - loss: 116.3854 - val_loss: 5.4352
Epoch 2/5
194601/194601 [==============================] - 233s 1ms/sample - loss: 1.8483 - val_loss: 3.8398
Epoch 3/5
194601/194601 [==============================] - 206s 1ms/sample - loss: 0.6441 - val_loss: 0.2240
Epoch 4/5
194601/194601 [==============================] - 248s 1ms/sample - loss: 0.2546 - val_loss: 0.1212
Epoch 5/5
194601/194601 [==============================] - 252s 1ms/sample - loss: 0.1832 - val_loss: 0.1357






    Out[143]:





<tensorflow.python.keras.callbacks.History at 0x1a3b85b1d0>



In [151]:

    
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    epochs=50, use_multiprocessing=True)









    



Train on 194601 samples, validate on 10000 samples
Epoch 1/50
194601/194601 [==============================] - 218s 1ms/sample - loss: 326.7025 - val_loss: 4.9697
Epoch 2/50
194601/194601 [==============================] - 198s 1ms/sample - loss: 4.7569 - val_loss: 1.5031
Epoch 3/50
194601/194601 [==============================] - 192s 989us/sample - loss: 2.2188 - val_loss: 0.7064
Epoch 4/50
194601/194601 [==============================] - 189s 973us/sample - loss: 1.6142 - val_loss: 0.3873
Epoch 5/50
194601/194601 [==============================] - 190s 975us/sample - loss: 1.3676 - val_loss: 0.2826
Epoch 6/50
194601/194601 [==============================] - 190s 977us/sample - loss: 1.2523 - val_loss: 0.2624
Epoch 7/50
194601/194601 [==============================] - 192s 984us/sample - loss: 1.2170 - val_loss: 0.1198
Epoch 8/50
194601/194601 [==============================] - 191s 984us/sample - loss: 1.1980 - val_loss: 0.3450
Epoch 9/50
194601/194601 [==============================] - 190s 974us/sample - loss: 1.2492 - val_loss: 0.1498
Epoch 10/50
194601/194601 [==============================] - 188s 967us/sample - loss: 109.6867 - val_loss: 135.0494
Epoch 11/50
194601/194601 [==============================] - 191s 979us/sample - loss: 3628.7882 - val_loss: 145.1902
Epoch 12/50
194601/194601 [==============================] - 192s 987us/sample - loss: 147.6975 - val_loss: 143.6387
Epoch 13/50
194601/194601 [==============================] - 191s 982us/sample - loss: 135.3904 - val_loss: 136.1410
Epoch 14/50
194601/194601 [==============================] - 190s 976us/sample - loss: 138.0715 - val_loss: 142.0082
Epoch 15/50
194601/194601 [==============================] - 190s 975us/sample - loss: 150.2713 - val_loss: 122.1642
Epoch 16/50
194601/194601 [==============================] - 191s 981us/sample - loss: 194.4811 - val_loss: 157.4011
Epoch 17/50
194601/194601 [==============================] - 191s 982us/sample - loss: 295.3036 - val_loss: 981.5715
Epoch 18/50
194601/194601 [==============================] - 193s 990us/sample - loss: 1135.6556 - val_loss: 149.0931
Epoch 19/50
194601/194601 [==============================] - 191s 980us/sample - loss: 2282.8591 - val_loss: 143.4084
Epoch 20/50
194601/194601 [==============================] - 191s 981us/sample - loss: 1515.8976 - val_loss: 1430.2080
Epoch 21/50
194601/194601 [==============================] - 192s 984us/sample - loss: 4582.7446 - val_loss: 184.3361
Epoch 22/50
194601/194601 [==============================] - 191s 981us/sample - loss: 2602.9031 - val_loss: 149.0369
Epoch 23/50
194601/194601 [==============================] - 191s 983us/sample - loss: 9676.4073 - val_loss: 149.6423
Epoch 24/50
194601/194601 [==============================] - 191s 980us/sample - loss: 7239.8273 - val_loss: 239.0134
Epoch 25/50
194601/194601 [==============================] - 192s 985us/sample - loss: 8267.8885 - val_loss: 193.2975
Epoch 26/50
194601/194601 [==============================] - 191s 982us/sample - loss: 21609147.9409 - val_loss: 1354990.5196
Epoch 27/50
194601/194601 [==============================] - 191s 981us/sample - loss: 188188.0964 - val_loss: 8307.5244
Epoch 28/50
194601/194601 [==============================] - 191s 980us/sample - loss: 11133.2162 - val_loss: 735.8909
Epoch 29/50
194601/194601 [==============================] - 192s 988us/sample - loss: 27304.7013 - val_loss: 550.2246
Epoch 30/50
194601/194601 [==============================] - 191s 979us/sample - loss: 3500210.6721 - val_loss: 6881.0110
Epoch 31/50
194601/194601 [==============================] - 191s 982us/sample - loss: 12381243691.6794 - val_loss: 232697165.3480
Epoch 32/50
194601/194601 [==============================] - 191s 981us/sample - loss: 704356007.8062 - val_loss: 1588541.0581
Epoch 33/50
194601/194601 [==============================] - 191s 980us/sample - loss: 5890180.5563 - val_loss: 13965.1964
Epoch 34/50
194601/194601 [==============================] - 191s 983us/sample - loss: 91581942.6742 - val_loss: 2227.1191
Epoch 35/50
194601/194601 [==============================] - 191s 982us/sample - loss: 566871311591.0952 - val_loss: 82189.5131
Epoch 36/50
194601/194601 [==============================] - 191s 980us/sample - loss: 13115223.4457 - val_loss: 860606.4987
Epoch 37/50
194601/194601 [==============================] - 194s 995us/sample - loss: 721692019.6183 - val_loss: 1078563464.5220
Epoch 38/50
194601/194601 [==============================] - 191s 982us/sample - loss: nan - val_loss: nan
Epoch 39/50
194601/194601 [==============================] - 191s 980us/sample - loss: nan - val_loss: nan
Epoch 40/50
194601/194601 [==============================] - 193s 990us/sample - loss: nan - val_loss: nan
Epoch 41/50
194601/194601 [==============================] - 192s 987us/sample - loss: nan - val_loss: nan
Epoch 42/50
194601/194601 [==============================] - 191s 980us/sample - loss: nan - val_loss: nan
Epoch 43/50
194601/194601 [==============================] - 192s 984us/sample - loss: nan - val_loss: nan
Epoch 44/50
194601/194601 [==============================] - 192s 985us/sample - loss: nan - val_loss: nan
Epoch 45/50
194601/194601 [==============================] - 192s 989us/sample - loss: nan - val_loss: nan
Epoch 46/50
194601/194601 [==============================] - 192s 984us/sample - loss: nan - val_loss: nan
Epoch 47/50
194601/194601 [==============================] - 191s 983us/sample - loss: nan - val_loss: nan
Epoch 48/50
194601/194601 [==============================] - 191s 983us/sample - loss: nan - val_loss: nan
Epoch 49/50
194601/194601 [==============================] - 191s 981us/sample - loss: nan - val_loss: nan
Epoch 50/50
194601/194601 [==============================] - 193s 990us/sample - loss: nan - val_loss: nan



In [156]:

    
model = lstm_model()
es = EarlyStopping(monitor='val_loss', mode='min')
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 194601 samples, validate on 10000 samples
Epoch 1/20
194601/194601 [==============================] - 198s 1ms/sample - loss: 249.5579 - val_loss: 0.7746
Epoch 2/20
194601/194601 [==============================] - 2346s 12ms/sample - loss: 2.5781 - val_loss: 0.6244
Epoch 3/20
194601/194601 [==============================] - 202s 1ms/sample - loss: 1.7952 - val_loss: 0.3346
Epoch 4/20
194601/194601 [==============================] - 195s 1ms/sample - loss: 1.3872 - val_loss: 0.1804
Epoch 5/20
194601/194601 [==============================] - 196s 1ms/sample - loss: 1.2802 - val_loss: 0.1384
Epoch 6/20
194601/194601 [==============================] - 195s 1ms/sample - loss: 1.2745 - val_loss: 0.2104



In [157]:

    
model.save("lstm100-dense100-dropout025-epochs20-early-stopping.h5")



In [162]:

    
def lstm_model_lin():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model

lin_act_model = lstm_model_lin()
es = EarlyStopping(monitor='val_loss', mode='min')
history = lin_act_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 16s 2ms/sample - loss: 722.3176 - val_loss: 23.1267
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 25.8648 - val_loss: 9.7359
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 13.8674 - val_loss: 4.2730
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 9.6217 - val_loss: 2.6301
Epoch 5/20
10000/10000 [==============================] - 16s 2ms/sample - loss: 7.4191 - val_loss: 1.9886
Epoch 6/20
10000/10000 [==============================] - 15s 1ms/sample - loss: 6.2753 - val_loss: 2.1294



In [164]:

    
def lstm_model_adam():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

adam_model = lstm_model_adam()
es = EarlyStopping(monitor='val_loss', mode='min')
history = adam_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 305.4409 - val_loss: 16.1725
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 19.2072 - val_loss: 6.6038
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 10.8057 - val_loss: 4.2676
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 7.9416 - val_loss: 2.1789
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 6.2256 - val_loss: 2.3635



In [166]:

    
def lstm_model_dropout50():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.50))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

dropout50_model = lstm_model_dropout50()
es = EarlyStopping(monitor='val_loss', mode='min')
history = dropout50_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 2095.8651 - val_loss: 253.8501
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 305.1297 - val_loss: 85.6102
Epoch 3/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 143.2311 - val_loss: 64.7771
Epoch 4/20
10000/10000 [==============================] - 15s 1ms/sample - loss: 66.5812 - val_loss: 36.7483
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 45.9202 - val_loss: 25.6323
Epoch 6/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 35.9694 - val_loss: 19.3018
Epoch 7/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 30.0887 - val_loss: 16.9329
Epoch 8/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 25.2751 - val_loss: 12.3551
Epoch 9/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 21.4555 - val_loss: 10.0203
Epoch 10/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 19.4236 - val_loss: 10.1653



In [172]:

    
def lstm_model_nodropout():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

nodropout_model = lstm_model_nodropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = nodropout_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 303.2399 - val_loss: 28.9260
Epoch 2/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 22.5050 - val_loss: 23.4065
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 17.9289 - val_loss: 13.7558
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 12.5689 - val_loss: 9.9349
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 9.5144 - val_loss: 8.0810
Epoch 6/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 8.5819 - val_loss: 8.3551



In [167]:

    
def lstm_model_relu():
    
    model = Sequential()
    model.add(LSTM(200, activation='relu', input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

relu_model = lstm_model_relu()
es = EarlyStopping(monitor='val_loss', mode='min')
history = relu_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 187.4669 - val_loss: 17.1837
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 20.4060 - val_loss: 5.7464
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 12.9890 - val_loss: 2.9215
Epoch 4/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 10.3698 - val_loss: 3.3642



In [170]:

    
def model_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

gru_model = model_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = gru_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 15.1533 - val_loss: 0.9619
Epoch 2/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 3.2930 - val_loss: 0.6037
Epoch 3/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 2.6465 - val_loss: 0.8654

Early Conclusions

GRU > LSTM
LeakyReLU > ReLU
adam > rmsprop
dropout 0.25 > dropout 0.5 > no dropout



In [243]:

    
def model_v2():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model = model_v2()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=8, use_multiprocessing=True)









    



Train on 194601 samples, validate on 10000 samples
Epoch 1/8
194601/194601 [==============================] - 208s 1ms/sample - loss: 2.3328 - val_loss: 0.3510
Epoch 2/8
194601/194601 [==============================] - 202s 1ms/sample - loss: 1.3041 - val_loss: 0.1264
Epoch 3/8
194601/194601 [==============================] - 204s 1ms/sample - loss: 1.2133 - val_loss: 0.2112



In [ ]:

    
from tensorflow.keras.back



In [26]:

    
def model_v2_deep():
    
    model = Sequential()
    model.add(GRU(30, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU(), return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_deep = model_v2_deep()
v2_model_deep.summary()









    



Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_7 (GRU)                  (None, 24, 30)            3420      
_________________________________________________________________
gru_8 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
gru_9 (GRU)                  (None, 30)                5580      
_________________________________________________________________
dense_3 (Dense)              (None, 100)               3100      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 505       
=================================================================
Total params: 18,185
Trainable params: 18,185
Non-trainable params: 0
_________________________________________________________________



In [ ]:

    
es = EarlyStopping(monitor='val_loss', mode='min', patience=2, restore_best_weights=True)
history = v2_model_deep.fit(x=X_train_array, y=y_train_array,
                            validation_data=(X_test_array, y_test_array),
                            callbacks=[es],
                            epochs=8, use_multiprocessing=True)









    



Train on 194601 samples, validate on 10000 samples
Epoch 1/8
194601/194601 [==============================] - 304s 2ms/sample - loss: 2.8970 - val_loss: 0.4792
Epoch 2/8
194601/194601 [==============================] - 290s 1ms/sample - loss: 1.5549 - val_loss: 0.5346
Epoch 3/8
194601/194601 [==============================] - 281s 1ms/sample - loss: 1.3498 - val_loss: 0.3451
Epoch 4/8
194601/194601 [==============================] - 275s 1ms/sample - loss: 1.4204 - val_loss: 0.4317
Epoch 5/8
194601/194601 [==============================] - 289s 1ms/sample - loss: 1.3467 - val_loss: 0.2232
Epoch 6/8
194601/194601 [==============================] - 268s 1ms/sample - loss: 1.2464 - val_loss: 0.1453
Epoch 7/8
194601/194601 [==============================] - 279s 1ms/sample - loss: 1.2127 - val_loss: 0.1390
Epoch 8/8
158144/194601 [=======================>......] - ETA: 55s - loss: 1.1842



In [242]:

    
def model_v2_dbl_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_gru = model_v2_dbl_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)









    



Train on 20000 samples, validate on 10000 samples
Epoch 1/10
20000/20000 [==============================] - 55s 3ms/sample - loss: 6.2944 - val_loss: 0.8832
Epoch 2/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 2.3380 - val_loss: 0.8004
Epoch 3/10
20000/20000 [==============================] - 49s 2ms/sample - loss: 1.9149 - val_loss: 0.6378
Epoch 4/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 1.8121 - val_loss: 0.4890
Epoch 5/10
20000/20000 [==============================] - 52s 3ms/sample - loss: 1.7023 - val_loss: 0.4793
Epoch 6/10
20000/20000 [==============================] - 50s 2ms/sample - loss: 1.7116 - val_loss: 0.5745
Epoch 7/10
20000/20000 [==============================] - 51s 3ms/sample - loss: 1.7073 - val_loss: 0.4136
Epoch 8/10
20000/20000 [==============================] - 51s 3ms/sample - loss: 1.6392 - val_loss: 0.6285
Epoch 9/10
20000/20000 [==============================] - 50s 2ms/sample - loss: 1.6129 - val_loss: 0.6862
Epoch 10/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 1.5222 - val_loss: 0.4429



In [177]:

    
def model_v2_2x_dropout():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dropout(0.25))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_dropout = model_v2_2x_dropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_dropout.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)









    



Train on 20000 samples, validate on 10000 samples
Epoch 1/20
20000/20000 [==============================] - 24s 1ms/sample - loss: 18.9925 - val_loss: 2.0618
Epoch 2/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 4.9002 - val_loss: 1.2547
Epoch 3/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.7768 - val_loss: 0.6070
Epoch 4/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.2649 - val_loss: 1.2479



In [200]:

    
def model_v2_big_gru():
    
    model = Sequential()
    model.add(GRU(400, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_big_gru = model_v2_big_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-200-da22d57a70b0> in <module>
     10     return model
     11 
---> 12 v2_model_big_gru = model_v2_big_gru()
     13 es = EarlyStopping(monitor='val_loss', mode='min')
     14 history = v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],

<ipython-input-200-da22d57a70b0> in model_v2_big_gru()
      2 
      3     model = Sequential()
----> 4     model.add(GRU(400, activation=LeakyReLU(), input_shape=(1, None, N_DETECTORS-1, N_KINEMATICS)))
      5     model.add(Dense(100, activation=LeakyReLU()))
      6     model.add(Dropout(0.25))

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
    175           # and create the node connecting the current layer
    176           # to the input layer we just created.
--> 177           layer(x)
    178           set_inputs = True
    179 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
    621 
    622     if initial_state is None and constants is None:
--> 623       return super(RNN, self).__call__(inputs, **kwargs)
    624 
    625     # If any of `initial_state` or `constants` are specified and are Keras

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    770         # are casted, not before.
    771         input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 772                                               self.name)
    773         graph = backend.get_graph()
    774         with graph.as_default(), backend.name_scope(self._name_scope()):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
    175                          'expected ndim=' + str(spec.ndim) + ', found ndim=' +
    176                          str(ndim) + '. Full shape received: ' +
--> 177                          str(x.shape.as_list()))
    178     if spec.max_ndim is not None:
    179       ndim = x.shape.ndims

ValueError: Input 0 of layer gru_14 is incompatible with the layer: expected ndim=3, found ndim=5. Full shape received: [None, 1, None, 24, 6]



In [181]:

    
v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=15, use_multiprocessing=True, initial_epoch=10)









    



Train on 20000 samples, validate on 10000 samples
Epoch 11/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.3613 - val_loss: 0.5861
Epoch 12/15
20000/20000 [==============================] - 55s 3ms/sample - loss: 1.3655 - val_loss: 0.2373
Epoch 13/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2724 - val_loss: 0.1703
Epoch 14/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2940 - val_loss: 0.3137
Epoch 15/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2810 - val_loss: 0.1698






    Out[181]:





<tensorflow.python.keras.callbacks.History at 0x1a8e946748>

Try CNN LSTM



In [221]:

    
X_train_array.shape









    Out[221]:





(194601, 24, 6)



In [240]:

    
def cnn_gru():
    
    model = Sequential()
    model.add(Conv1D(filters=5, kernel_size=2, strides=1, input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    #model.add(MaxPooling1D())
    model.add(GRU(200, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

cnn_model = cnn_gru()
cnn_model.summary()









    



Model: "sequential_73"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1d_20 (Conv1D)           (None, 23, 5)             65        
_________________________________________________________________
gru_25 (GRU)                 (None, 200)               124200    
_________________________________________________________________
dense_69 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_70 (Dense)             (None, 5)                 505       
=================================================================
Total params: 144,870
Trainable params: 144,870
Non-trainable params: 0
_________________________________________________________________



In [241]:

    
#es = EarlyStopping(monitor='val_loss', mode='min')
history = cnn_model.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                      validation_data=(X_test_array, y_test_array),
                      epochs=10, use_multiprocessing=True)









    



Train on 20000 samples, validate on 10000 samples
Epoch 1/10
20000/20000 [==============================] - 23s 1ms/sample - loss: 13.3734 - val_loss: 1.6757
Epoch 2/10
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.0693 - val_loss: 0.8470
Epoch 3/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.6314 - val_loss: 0.7303
Epoch 4/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.1451 - val_loss: 0.8053
Epoch 5/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.0921 - val_loss: 0.7273
Epoch 6/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.9285 - val_loss: 0.4551
Epoch 7/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.8432 - val_loss: 0.6966
Epoch 8/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.6800 - val_loss: 0.5120
Epoch 9/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.5675 - val_loss: 0.3390
Epoch 10/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.5159 - val_loss: 0.3983



In [251]:

    
history.history









    Out[251]:





{'loss': [3.3281039200901317,
  1.6039592637484563,
  1.3489888134789536,
  1.2624885631565317,
  1.2353142021468715,
  1.211998767219029,
  1.1837373140878185,
  1.1759768705626037],
 'val_loss': [0.778679012966156,
  0.5407980192184448,
  0.5594191231250762,
  0.4179811120986939,
  0.27897539434432983,
  0.18599163811206817,
  0.1257927789211273,
  0.10037544323205948]}

Enough tinkering around

Formalize this into some scripts
Make predictions on competition test data



In [1]:

    
from train import train
from predict import predict



In [2]:

    
model = train(frac=1.00, filename="dannowitz_jlab2_model", epochs=100, ret_model=True)









    



Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru (GRU)                    (None, 24, 30)            3420      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 24, 30)            0         
_________________________________________________________________
gru_1 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 24, 30)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 30)                5580      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 100)               3100      
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 100)               0         
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 505       
=================================================================
Total params: 18,185
Trainable params: 18,185
Non-trainable params: 0
_________________________________________________________________
None
Train on 194601 samples, validate on 10000 samples
Epoch 1/100
194601/194601 [==============================] - 308s 2ms/sample - loss: 5.6312 - val_loss: 1.2376
Epoch 2/100
103392/194601 [==============>...............] - ETA: 2:17 - loss: 2.2091WARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss






    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-2-7f967f082214> in <module>
----> 1 model = train(frac=1.00, filename="dannowitz_jlab2_model", epochs=100, ret_model=True)

~/Work/polymath-progression-blog/jlab-ml-lunch-2/train.py in train(frac, filename, epochs, ret_model)
     90         validation_data=(X_test, y_test),
     91         callbacks=[es],
---> 92         epochs=epochs,
     93         use_multiprocessing=True,
     94     )

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    732         max_queue_size=max_queue_size,
    733         workers=workers,
--> 734         use_multiprocessing=use_multiprocessing)
    735 
    736   def evaluate(self,

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    413       # In this case we have created variables on the first call, so we run the
    414       # defunned version which is guaranteed to never create variables.
--> 415       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    416     elif self._stateful_fn is not None:
    417       # In this case we have not created variables on the first call. So we can

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1820     """Calls a graph function specialized to the inputs."""
   1821     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1822     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1823 
   1824   @property

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     59     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,
     60                                                op_name, inputs, attrs,
---> 61                                                num_outputs)
     62   except core._NotOkStatusException as e:
     63     if name is not None:

KeyboardInterrupt:



In [ ]:

    
preds = predict(model_filename="dannowitz_jlab2_model.h5",
                data_filename="test_in (1).csv",
                output_filename="danowitz_jlab2_submission.csv")

	x	y	px	py	pz
0	-23.123945	3.142886	-0.235592	0.091612	2.413377
1	19.633486	32.319292	0.314376	0.316425	2.592952
2	-8.308506	-39.299613	-0.020097	-0.051232	0.948906
3	19.918838	10.664617	0.038102	0.047740	1.864014
4	13.649239	-20.616935	-0.015548	0.001471	2.323953

	x	y	z	px	py	pz	x1	y1	z1	px1	...	z23	px23	py23	pz23	x24	y24	z24	px24	py24	pz24
0	0.877	1.322	65.0	-0.244	-0.053	2.414	-10.669	0.330	176.944	-0.254	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	0.786	-2.483	65.0	0.103	0.432	2.593	7.366	15.502	176.944	0.206	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	-13.134	-26.531	65.0	0.064	-0.021	0.953	-7.586	-30.687	176.944	0.027	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
3	18.454	2.805	65.0	-0.019	0.069	1.833	18.043	6.797	176.944	0.013	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
4	15.552	-19.196	65.0	-0.010	-0.011	2.366	15.068	-19.750	176.944	-0.014	...	341.28	-0.014	-0.002	2.351	NaN	NaN	343.405	NaN	NaN	NaN