03-Recurrent-Network-Model


03 - Sequence Model Approach

  • The more 'classical' approach to solving this problem
  • Train a model that can take any number of 'steps'
  • Makes a prediction on next step based on previous steps
  • Learn from full tracks
  • For test tracks, predict what the next step's values will be

In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, LeakyReLU, Dropout, ReLU, GRU, TimeDistributed, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
from jlab import load_test_data, get_test_detector_plane

Load up and prep the datasets


In [3]:
X_train = pd.read_csv('MLchallenge2_training.csv')
X_test = load_test_data('test_in.csv')
eval_planes = get_test_detector_plane(X_test)

# Also, load our truth values
y_true = pd.read_csv('test_prediction.csv', names=['x', 'y', 'px', 'py', 'pz'],
                     header=None)

In [4]:
X_test.head()


Out[4]:
x y z px py pz x1 y1 z1 px1 ... z23 px23 py23 pz23 x24 y24 z24 px24 py24 pz24
0 0.877 1.322 65.0 -0.244 -0.053 2.414 -10.669 0.330 176.944 -0.254 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 0.786 -2.483 65.0 0.103 0.432 2.593 7.366 15.502 176.944 0.206 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 -13.134 -26.531 65.0 0.064 -0.021 0.953 -7.586 -30.687 176.944 0.027 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 18.454 2.805 65.0 -0.019 0.069 1.833 18.043 6.797 176.944 0.013 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 15.552 -19.196 65.0 -0.010 -0.011 2.366 15.068 -19.750 176.944 -0.014 ... 341.28 -0.014 -0.002 2.351 NaN NaN 343.405 NaN NaN NaN

5 rows × 150 columns


In [5]:
y_true.head()


Out[5]:
x y px py pz
0 -23.123945 3.142886 -0.235592 0.091612 2.413377
1 19.633486 32.319292 0.314376 0.316425 2.592952
2 -8.308506 -39.299613 -0.020097 -0.051232 0.948906
3 19.918838 10.664617 0.038102 0.047740 1.864014
4 13.649239 -20.616935 -0.015548 0.001471 2.323953

Construct the training data and targets

  • For each track
    • Choose a number N between 8 and 24
    • That track will have 6 kinematics for N blocks
    • The target variable will be the 6 kinematic variables for the N+1th detector block
  • This will cause variable length sequences
  • Apply pad_sequences to prepend with zeros appropriately

Training Dataset


In [10]:
N_SAMPLES = len(X_train)
N_DETECTORS = 25
N_KINEMATICS = 6
SHAPE = (N_SAMPLES, N_DETECTORS-1, N_KINEMATICS)

In [11]:
X_train_list = []
y_train_array = np.ndarray(shape=(N_SAMPLES, N_KINEMATICS-1))
for ix in range(N_SAMPLES):
    seq_len = np.random.choice(range(8, 25))
    track = X_train.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_train_list.append(track[0:seq_len])
    # Store the kinematics of the next in the sequence
    # Ignore the 3rd one, which is z
    y_train_array[ix] = track[seq_len][[0,1,3,4,5]]

In [12]:
for track in X_train_list[:10]:
    print(len(track))


12
17
16
22
9
11
13
22
24
17

In [13]:
X_train_list = pad_sequences(X_train_list, dtype=float)

In [14]:
for track in X_train_list[:10]:
    print(len(track))


24
24
24
24
24
24
24
24
24
24

In [15]:
X_train_array = np.array(X_train_list)
X_train_array.shape


Out[15]:
(194601, 24, 6)

In [16]:
y_train_array.shape


Out[16]:
(194601, 5)

Validation Dataset


In [17]:
N_TEST_SAMPLES = len(X_test)

In [18]:
y_test_array = y_true.values

In [19]:
X_test_list = []
for ix in range(N_TEST_SAMPLES):
    seq_len = get_test_detector_plane(X_test.iloc[ix])
    track = X_test.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
    X_test_list.append(track[0:seq_len])

In [20]:
X_test_list = pad_sequences(X_test_list, dtype=float)
X_test_array = np.array(X_test_list)

In [21]:
X_test_array.shape


Out[21]:
(10000, 24, 6)

In [22]:
y_test_array.shape


Out[22]:
(10000, 5)

In [23]:
y_true.values.shape


Out[23]:
(10000, 5)

In [1]:
import pandas as pd
import numpy as np
from math import floor
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv('MLchallenge2_training.csv')

In [4]:
# Z values are constant -- what are they?
Z_VALS = data[['z'] + [f'z{i}' for i in range(1, 25)]].loc[0].values
# Z-distance from one timestep to another is set; calculate it
Z_DIST = [Z_VALS[i+1] - Z_VALS[i] for i in range(0, 24)] + [0.0]

In [18]:
# Number of timesteps
N_DETECTORS = 25
# Provided number of kinematics
N_KINEMATICS = 6
# Number of features after engineering them all
N_FEATURES = 13

In [4]:
def get_detector_meta(kin_array, det_id):
    
    # Is there a large gap after this detector?
    # 0 is for padded timesteps
    # 1 is for No, 2 is for Yes
    mind_the_gap = int(det_id % 6 == 0) + 1
    
    # Detector group: 1 (origin), 2, 3, 4, or 5
    det_grp = floor((det_id-1) / 6) + 2
    
    # Detectors numbered 1-6 (origin is 6)
    # (Which one in the group of six is it?)
    det_rank = ((det_id-1) % 6) + 1
    
    # Distance to the next detector?
    z_dist = Z_DIST[det_id]
    
    # Transverse momentum (x-y component)
    pt = np.sqrt(np.square(kin_array[3]) + np.square(kin_array[4]))
    
    # Total momentum
    p_tot = np.sqrt(np.square(kin_array[3])
                    + np.square(kin_array[4])
                    + np.square(kin_array[5]))

    # Put all the calculated features together
    det_meta = np.array([det_id, mind_the_gap, det_grp, det_rank,
                         z_dist, pt, p_tot])
    
    # Return detector data plus calculated features
    return np.concatenate([kin_array, det_meta], axis=None)

In [5]:
def tracks_to_time_series(X):
    """Convert training dataframe to multivariate time series training set
    
    Pivots each track to a series ot timesteps. Then randomly truncates them
    to be identical to the provided test set. The step after the truncated
    step is saved as the target.
    
    Truncated sequence are front-padded with zeros.
    
    Parameters
    ----------
    X : pandas.DataFrame
    
    Returns
    -------
    (numpy.ndarray, numpy.ndarray)
        Tuple of the training data and labels
    """
    
    X_ts_list = []
    n_samples = len(X)
    y_array = np.ndarray(shape=(n_samples, N_KINEMATICS-1))
    for ix in range(n_samples):
        # Randomly choose how many detectors the track went through
        track_len = np.random.choice(range(8, 25))
        # Reshape into ts-like
        track = X.iloc[ix].values.reshape(N_DETECTORS, N_KINEMATICS)
        #eng_track = np.zeros(shape=(N_DETECTORS, N_FEATURES))
        #for i in range(0, N_DETECTORS):
        #    eng_track[i] = get_detector_meta(track[i], i)
        # Truncate the track to only N detectors
        X_ts_list.append(track[0:track_len])
        # Store the kinematics of the next in the sequence
        # Ignore the 3rd one, which is z
        y_array[ix] = track[track_len][[0,1,3,4,5]]
        
    # Pad the training sequence
    X_ts_list = pad_sequences(X_ts_list, dtype=float)
    X_ts_array = np.array(X_ts_list)
    
    return X_ts_array, y_array

In [6]:
X, y = tracks_to_time_series(data)

In [7]:
X[3]


Out[7]:
array([[ 0.00000e+00,  0.00000e+00,  0.00000e+00,  0.00000e+00,
         0.00000e+00,  0.00000e+00],
       [ 1.84542e+01,  2.80469e+00,  6.50000e+01, -1.93839e-02,
         6.93840e-02,  1.83259e+00],
       [ 1.80433e+01,  6.79747e+00,  1.76944e+02,  1.30392e-02,
         6.20286e-02,  1.82485e+00],
       [ 1.80588e+01,  6.86968e+00,  1.79069e+02,  1.32902e-02,
         6.19080e-02,  1.82484e+00],
       [ 1.80749e+01,  6.94169e+00,  1.81195e+02,  1.48815e-02,
         6.18535e-02,  1.82481e+00],
       [ 1.80926e+01,  7.01374e+00,  1.83320e+02,  1.53596e-02,
         6.20166e-02,  1.82479e+00],
       [ 1.81105e+01,  7.08585e+00,  1.85445e+02,  1.54059e-02,
         6.18141e-02,  1.82477e+00],
       [ 1.81287e+01,  7.15783e+00,  1.87571e+02,  1.59919e-02,
         6.19073e-02,  1.82469e+00],
       [ 1.86773e+01,  8.71037e+00,  2.35514e+02,  2.58893e-02,
         5.57861e-02,  1.82465e+00],
       [ 1.87077e+01,  8.77517e+00,  2.37639e+02,  2.62399e-02,
         5.55210e-02,  1.82464e+00],
       [ 1.87386e+01,  8.83959e+00,  2.39765e+02,  2.68784e-02,
         5.50810e-02,  1.82463e+00],
       [ 1.87702e+01,  8.90362e+00,  2.41890e+02,  2.72003e-02,
         5.48001e-02,  1.82462e+00],
       [ 1.88022e+01,  8.96717e+00,  2.44015e+02,  2.78705e-02,
         5.44760e-02,  1.82460e+00],
       [ 1.88350e+01,  9.03072e+00,  2.46141e+02,  2.84710e-02,
         5.46504e-02,  1.82457e+00],
       [ 1.97008e+01,  1.03871e+01,  2.94103e+02,  3.72172e-02,
         4.90906e-02,  1.82448e+00],
       [ 1.97443e+01,  1.04442e+01,  2.96228e+02,  3.75142e-02,
         4.91246e-02,  1.82445e+00],
       [ 1.97881e+01,  1.05014e+01,  2.98354e+02,  3.77907e-02,
         4.89708e-02,  1.82442e+00],
       [ 1.98327e+01,  1.05576e+01,  3.00479e+02,  3.88871e-02,
         4.78588e-02,  1.82440e+00],
       [ 1.98782e+01,  1.06133e+01,  3.02604e+02,  3.92594e-02,
         4.77353e-02,  1.82438e+00],
       [ 1.99239e+01,  1.06690e+01,  3.04730e+02,  3.91888e-02,
         4.77707e-02,  1.82436e+00],
       [ 2.05370e+01,  1.14109e+01,  3.32778e+02,  4.02737e-02,
         4.95768e-02,  1.82423e+00],
       [ 2.05840e+01,  1.14690e+01,  3.34903e+02,  4.03437e-02,
         4.99216e-02,  1.82419e+00],
       [ 2.06307e+01,  1.15272e+01,  3.37029e+02,  3.97329e-02,
         5.01374e-02,  1.82419e+00],
       [ 2.06771e+01,  1.15861e+01,  3.39154e+02,  3.97559e-02,
         5.08750e-02,  1.82415e+00]])

In [8]:
y[3]


Out[8]:
array([20.7233   , 11.6457   ,  0.039513 ,  0.0515839,  1.82412  ])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [10]:
len(X_train), len(X_test)


Out[10]:
(145950, 48651)

Multi-layer GRU Model with LReLU


In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, LeakyReLU, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import joblib

In [20]:
def lrelu(x):
    return LeakyReLU()(x)


def gru_model(gru_units=35, dense_units=100,
              dropout_rate=0.25):
    """Model definition.
    
    Three layers of Gated Recurrent Units (GRUs), utilizing
    LeakyReLU activations, finally passing GRU block output
    to a dense layer, passing its output to the final output
    layer, with a touch of dropout in between.
    
    Bon apetit.
    
    Parameters
    ----------
    gru_units : int
    dense_units : int
    dropout_rate : float
    
    Returns
    -------
    tensorflow.keras.models.Sequential
    
    """
    
    model = Sequential()
    
    model.add(GRU(gru_units, activation=lrelu,
                  input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(gru_units, activation=lrelu,
                  return_sequences=True))
    model.add(GRU(gru_units, activation=lrelu))
    
    model.add(Dense(dense_units, activation=lrelu))
    model.add(Dropout(dropout_rate))
    model.add(Dense(N_KINEMATICS-1))
    
    model.compile(loss='mse', optimizer='adam')
    
    return model

In [21]:
model = gru_model()

In [22]:
model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_3 (GRU)                  (None, 24, 35)            4515      
_________________________________________________________________
gru_4 (GRU)                  (None, 24, 35)            7560      
_________________________________________________________________
gru_5 (GRU)                  (None, 35)                7560      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               3600      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 505       
=================================================================
Total params: 23,740
Trainable params: 23,740
Non-trainable params: 0
_________________________________________________________________

In [16]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='gru_model.png', show_shapes=True)


Out[16]:

In [17]:
es = EarlyStopping(monitor='val_loss', mode='min',
                   patience=5, restore_best_weights=True)
history = model.fit(
    x=X_train,
    y=y_train,
    validation_data=(X_test, y_test),
    callbacks=[es],
    epochs=50,
)

model.save("gru_model.h5")
joblib.dump(history.history, "gru_model.history")


Train on 145950 samples, validate on 48651 samples
Epoch 1/50
 31936/145950 [=====>........................] - ETA: 3:06 - loss: 8.1076WARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-17-b9d369c7fd41> in <module>
      6     validation_data=(X_test, y_test),
      7     callbacks=[es],
----> 8     epochs=50,
      9 )
     10 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    732         max_queue_size=max_queue_size,
    733         workers=workers,
--> 734         use_multiprocessing=use_multiprocessing)
    735 
    736   def evaluate(self,

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    413       # In this case we have created variables on the first call, so we run the
    414       # defunned version which is guaranteed to never create variables.
--> 415       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    416     elif self._stateful_fn is not None:
    417       # In this case we have not created variables on the first call. So we can

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1820     """Calls a graph function specialized to the inputs."""
   1821     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1822     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1823 
   1824   @property

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     59     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,
     60                                                op_name, inputs, attrs,
---> 61                                                num_outputs)
     62   except core._NotOkStatusException as e:
     63     if name is not None:

KeyboardInterrupt: 

In [8]:
history = joblib.load("dannowitz_jlab2_model_20191031.history")

In [11]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()


Calculate the score on my predictions

  • Scoring code provided by Thomas Britton
  • Each kinematic has different weight

In [42]:
pred = pd.read_csv('data/submission/dannowitz_jlab2_submission_20191112.csv', header=None)
truth = pd.read_csv('data/ANSWERS.csv', header=None)

In [43]:
# Calculate square root of the mean squared error
# Then apply weights and sum them all up
sq_error = (truth - pred).applymap(np.square)
mse = sq_error.sum() / len(truth)
rmse = np.sqrt(mse)
rms_weighted = rmse / [0.03, 0.03, 0.01, 0.01, 0.011]
score = rms_weighted.sum()
score


Out[43]:
72.17822893581052

Visualize the predictions vs true

You can slice and dice the stats however you want, but it helps to be able to see your predictions at work.

Running history of me tinkering around

  • I didn't arrive at this construction from the start.
  • Many different changes and tweaks

In [161]:
def lstm_model():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model

In [150]:
model = lstm_model()
model.summary()


Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
lstm_9 (LSTM)                (None, 200)               165600    
_________________________________________________________________
dense_17 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 5)                 505       
=================================================================
Total params: 186,205
Trainable params: 186,205
Non-trainable params: 0
_________________________________________________________________

In [143]:
history = model.fit(x=X_train_array, y=y_train_array, validation_data=(X_test_array, y_test_array), epochs=5)


Train on 194601 samples, validate on 10000 samples
Epoch 1/5
194601/194601 [==============================] - 259s 1ms/sample - loss: 116.3854 - val_loss: 5.4352
Epoch 2/5
194601/194601 [==============================] - 233s 1ms/sample - loss: 1.8483 - val_loss: 3.8398
Epoch 3/5
194601/194601 [==============================] - 206s 1ms/sample - loss: 0.6441 - val_loss: 0.2240
Epoch 4/5
194601/194601 [==============================] - 248s 1ms/sample - loss: 0.2546 - val_loss: 0.1212
Epoch 5/5
194601/194601 [==============================] - 252s 1ms/sample - loss: 0.1832 - val_loss: 0.1357
Out[143]:
<tensorflow.python.keras.callbacks.History at 0x1a3b85b1d0>

In [151]:
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    epochs=50, use_multiprocessing=True)


Train on 194601 samples, validate on 10000 samples
Epoch 1/50
194601/194601 [==============================] - 218s 1ms/sample - loss: 326.7025 - val_loss: 4.9697
Epoch 2/50
194601/194601 [==============================] - 198s 1ms/sample - loss: 4.7569 - val_loss: 1.5031
Epoch 3/50
194601/194601 [==============================] - 192s 989us/sample - loss: 2.2188 - val_loss: 0.7064
Epoch 4/50
194601/194601 [==============================] - 189s 973us/sample - loss: 1.6142 - val_loss: 0.3873
Epoch 5/50
194601/194601 [==============================] - 190s 975us/sample - loss: 1.3676 - val_loss: 0.2826
Epoch 6/50
194601/194601 [==============================] - 190s 977us/sample - loss: 1.2523 - val_loss: 0.2624
Epoch 7/50
194601/194601 [==============================] - 192s 984us/sample - loss: 1.2170 - val_loss: 0.1198
Epoch 8/50
194601/194601 [==============================] - 191s 984us/sample - loss: 1.1980 - val_loss: 0.3450
Epoch 9/50
194601/194601 [==============================] - 190s 974us/sample - loss: 1.2492 - val_loss: 0.1498
Epoch 10/50
194601/194601 [==============================] - 188s 967us/sample - loss: 109.6867 - val_loss: 135.0494
Epoch 11/50
194601/194601 [==============================] - 191s 979us/sample - loss: 3628.7882 - val_loss: 145.1902
Epoch 12/50
194601/194601 [==============================] - 192s 987us/sample - loss: 147.6975 - val_loss: 143.6387
Epoch 13/50
194601/194601 [==============================] - 191s 982us/sample - loss: 135.3904 - val_loss: 136.1410
Epoch 14/50
194601/194601 [==============================] - 190s 976us/sample - loss: 138.0715 - val_loss: 142.0082
Epoch 15/50
194601/194601 [==============================] - 190s 975us/sample - loss: 150.2713 - val_loss: 122.1642
Epoch 16/50
194601/194601 [==============================] - 191s 981us/sample - loss: 194.4811 - val_loss: 157.4011
Epoch 17/50
194601/194601 [==============================] - 191s 982us/sample - loss: 295.3036 - val_loss: 981.5715
Epoch 18/50
194601/194601 [==============================] - 193s 990us/sample - loss: 1135.6556 - val_loss: 149.0931
Epoch 19/50
194601/194601 [==============================] - 191s 980us/sample - loss: 2282.8591 - val_loss: 143.4084
Epoch 20/50
194601/194601 [==============================] - 191s 981us/sample - loss: 1515.8976 - val_loss: 1430.2080
Epoch 21/50
194601/194601 [==============================] - 192s 984us/sample - loss: 4582.7446 - val_loss: 184.3361
Epoch 22/50
194601/194601 [==============================] - 191s 981us/sample - loss: 2602.9031 - val_loss: 149.0369
Epoch 23/50
194601/194601 [==============================] - 191s 983us/sample - loss: 9676.4073 - val_loss: 149.6423
Epoch 24/50
194601/194601 [==============================] - 191s 980us/sample - loss: 7239.8273 - val_loss: 239.0134
Epoch 25/50
194601/194601 [==============================] - 192s 985us/sample - loss: 8267.8885 - val_loss: 193.2975
Epoch 26/50
194601/194601 [==============================] - 191s 982us/sample - loss: 21609147.9409 - val_loss: 1354990.5196
Epoch 27/50
194601/194601 [==============================] - 191s 981us/sample - loss: 188188.0964 - val_loss: 8307.5244
Epoch 28/50
194601/194601 [==============================] - 191s 980us/sample - loss: 11133.2162 - val_loss: 735.8909
Epoch 29/50
194601/194601 [==============================] - 192s 988us/sample - loss: 27304.7013 - val_loss: 550.2246
Epoch 30/50
194601/194601 [==============================] - 191s 979us/sample - loss: 3500210.6721 - val_loss: 6881.0110
Epoch 31/50
194601/194601 [==============================] - 191s 982us/sample - loss: 12381243691.6794 - val_loss: 232697165.3480
Epoch 32/50
194601/194601 [==============================] - 191s 981us/sample - loss: 704356007.8062 - val_loss: 1588541.0581
Epoch 33/50
194601/194601 [==============================] - 191s 980us/sample - loss: 5890180.5563 - val_loss: 13965.1964
Epoch 34/50
194601/194601 [==============================] - 191s 983us/sample - loss: 91581942.6742 - val_loss: 2227.1191
Epoch 35/50
194601/194601 [==============================] - 191s 982us/sample - loss: 566871311591.0952 - val_loss: 82189.5131
Epoch 36/50
194601/194601 [==============================] - 191s 980us/sample - loss: 13115223.4457 - val_loss: 860606.4987
Epoch 37/50
194601/194601 [==============================] - 194s 995us/sample - loss: 721692019.6183 - val_loss: 1078563464.5220
Epoch 38/50
194601/194601 [==============================] - 191s 982us/sample - loss: nan - val_loss: nan
Epoch 39/50
194601/194601 [==============================] - 191s 980us/sample - loss: nan - val_loss: nan
Epoch 40/50
194601/194601 [==============================] - 193s 990us/sample - loss: nan - val_loss: nan
Epoch 41/50
194601/194601 [==============================] - 192s 987us/sample - loss: nan - val_loss: nan
Epoch 42/50
194601/194601 [==============================] - 191s 980us/sample - loss: nan - val_loss: nan
Epoch 43/50
194601/194601 [==============================] - 192s 984us/sample - loss: nan - val_loss: nan
Epoch 44/50
194601/194601 [==============================] - 192s 985us/sample - loss: nan - val_loss: nan
Epoch 45/50
194601/194601 [==============================] - 192s 989us/sample - loss: nan - val_loss: nan
Epoch 46/50
194601/194601 [==============================] - 192s 984us/sample - loss: nan - val_loss: nan
Epoch 47/50
194601/194601 [==============================] - 191s 983us/sample - loss: nan - val_loss: nan
Epoch 48/50
194601/194601 [==============================] - 191s 983us/sample - loss: nan - val_loss: nan
Epoch 49/50
194601/194601 [==============================] - 191s 981us/sample - loss: nan - val_loss: nan
Epoch 50/50
194601/194601 [==============================] - 193s 990us/sample - loss: nan - val_loss: nan

In [156]:
model = lstm_model()
es = EarlyStopping(monitor='val_loss', mode='min')
history = model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 194601 samples, validate on 10000 samples
Epoch 1/20
194601/194601 [==============================] - 198s 1ms/sample - loss: 249.5579 - val_loss: 0.7746
Epoch 2/20
194601/194601 [==============================] - 2346s 12ms/sample - loss: 2.5781 - val_loss: 0.6244
Epoch 3/20
194601/194601 [==============================] - 202s 1ms/sample - loss: 1.7952 - val_loss: 0.3346
Epoch 4/20
194601/194601 [==============================] - 195s 1ms/sample - loss: 1.3872 - val_loss: 0.1804
Epoch 5/20
194601/194601 [==============================] - 196s 1ms/sample - loss: 1.2802 - val_loss: 0.1384
Epoch 6/20
194601/194601 [==============================] - 195s 1ms/sample - loss: 1.2745 - val_loss: 0.2104

In [157]:
model.save("lstm100-dense100-dropout025-epochs20-early-stopping.h5")

In [162]:
def lstm_model_lin():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1, activation='linear'))
    model.compile(loss='mse', optimizer='adam')
    
    return model

lin_act_model = lstm_model_lin()
es = EarlyStopping(monitor='val_loss', mode='min')
history = lin_act_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 16s 2ms/sample - loss: 722.3176 - val_loss: 23.1267
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 25.8648 - val_loss: 9.7359
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 13.8674 - val_loss: 4.2730
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 9.6217 - val_loss: 2.6301
Epoch 5/20
10000/10000 [==============================] - 16s 2ms/sample - loss: 7.4191 - val_loss: 1.9886
Epoch 6/20
10000/10000 [==============================] - 15s 1ms/sample - loss: 6.2753 - val_loss: 2.1294

In [164]:
def lstm_model_adam():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

adam_model = lstm_model_adam()
es = EarlyStopping(monitor='val_loss', mode='min')
history = adam_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 305.4409 - val_loss: 16.1725
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 19.2072 - val_loss: 6.6038
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 10.8057 - val_loss: 4.2676
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 7.9416 - val_loss: 2.1789
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 6.2256 - val_loss: 2.3635

In [166]:
def lstm_model_dropout50():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.50))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

dropout50_model = lstm_model_dropout50()
es = EarlyStopping(monitor='val_loss', mode='min')
history = dropout50_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 2095.8651 - val_loss: 253.8501
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 305.1297 - val_loss: 85.6102
Epoch 3/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 143.2311 - val_loss: 64.7771
Epoch 4/20
10000/10000 [==============================] - 15s 1ms/sample - loss: 66.5812 - val_loss: 36.7483
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 45.9202 - val_loss: 25.6323
Epoch 6/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 35.9694 - val_loss: 19.3018
Epoch 7/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 30.0887 - val_loss: 16.9329
Epoch 8/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 25.2751 - val_loss: 12.3551
Epoch 9/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 21.4555 - val_loss: 10.0203
Epoch 10/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 19.4236 - val_loss: 10.1653

In [172]:
def lstm_model_nodropout():
    
    model = Sequential()
    model.add(LSTM(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

nodropout_model = lstm_model_nodropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = nodropout_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 303.2399 - val_loss: 28.9260
Epoch 2/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 22.5050 - val_loss: 23.4065
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 17.9289 - val_loss: 13.7558
Epoch 4/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 12.5689 - val_loss: 9.9349
Epoch 5/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 9.5144 - val_loss: 8.0810
Epoch 6/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 8.5819 - val_loss: 8.3551

In [167]:
def lstm_model_relu():
    
    model = Sequential()
    model.add(LSTM(200, activation='relu', input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

relu_model = lstm_model_relu()
es = EarlyStopping(monitor='val_loss', mode='min')
history = relu_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 187.4669 - val_loss: 17.1837
Epoch 2/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 20.4060 - val_loss: 5.7464
Epoch 3/20
10000/10000 [==============================] - 13s 1ms/sample - loss: 12.9890 - val_loss: 2.9215
Epoch 4/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 10.3698 - val_loss: 3.3642

In [170]:
def model_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

gru_model = model_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = gru_model.fit(x=X_train_array[:10000], y=y_train_array[:10000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 10000 samples, validate on 10000 samples
Epoch 1/20
10000/10000 [==============================] - 14s 1ms/sample - loss: 15.1533 - val_loss: 0.9619
Epoch 2/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 3.2930 - val_loss: 0.6037
Epoch 3/20
10000/10000 [==============================] - 12s 1ms/sample - loss: 2.6465 - val_loss: 0.8654

Early Conclusions

  • GRU > LSTM
  • LeakyReLU > ReLU
  • adam > rmsprop
  • dropout 0.25 > dropout 0.5 > no dropout

In [243]:
def model_v2():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model = model_v2()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model.fit(x=X_train_array, y=y_train_array,
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=8, use_multiprocessing=True)


Train on 194601 samples, validate on 10000 samples
Epoch 1/8
194601/194601 [==============================] - 208s 1ms/sample - loss: 2.3328 - val_loss: 0.3510
Epoch 2/8
194601/194601 [==============================] - 202s 1ms/sample - loss: 1.3041 - val_loss: 0.1264
Epoch 3/8
194601/194601 [==============================] - 204s 1ms/sample - loss: 1.2133 - val_loss: 0.2112

In [ ]:
from tensorflow.keras.back

In [26]:
def model_v2_deep():
    
    model = Sequential()
    model.add(GRU(30, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU(), return_sequences=True))
    model.add(GRU(30, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_deep = model_v2_deep()
v2_model_deep.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru_7 (GRU)                  (None, 24, 30)            3420      
_________________________________________________________________
gru_8 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
gru_9 (GRU)                  (None, 30)                5580      
_________________________________________________________________
dense_3 (Dense)              (None, 100)               3100      
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 505       
=================================================================
Total params: 18,185
Trainable params: 18,185
Non-trainable params: 0
_________________________________________________________________

In [ ]:
es = EarlyStopping(monitor='val_loss', mode='min', patience=2, restore_best_weights=True)
history = v2_model_deep.fit(x=X_train_array, y=y_train_array,
                            validation_data=(X_test_array, y_test_array),
                            callbacks=[es],
                            epochs=8, use_multiprocessing=True)


Train on 194601 samples, validate on 10000 samples
Epoch 1/8
194601/194601 [==============================] - 304s 2ms/sample - loss: 2.8970 - val_loss: 0.4792
Epoch 2/8
194601/194601 [==============================] - 290s 1ms/sample - loss: 1.5549 - val_loss: 0.5346
Epoch 3/8
194601/194601 [==============================] - 281s 1ms/sample - loss: 1.3498 - val_loss: 0.3451
Epoch 4/8
194601/194601 [==============================] - 275s 1ms/sample - loss: 1.4204 - val_loss: 0.4317
Epoch 5/8
194601/194601 [==============================] - 289s 1ms/sample - loss: 1.3467 - val_loss: 0.2232
Epoch 6/8
194601/194601 [==============================] - 268s 1ms/sample - loss: 1.2464 - val_loss: 0.1453
Epoch 7/8
194601/194601 [==============================] - 279s 1ms/sample - loss: 1.2127 - val_loss: 0.1390
Epoch 8/8
158144/194601 [=======================>......] - ETA: 55s - loss: 1.1842

In [242]:
def model_v2_dbl_gru():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS),
                  return_sequences=True))
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_gru = model_v2_dbl_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)


Train on 20000 samples, validate on 10000 samples
Epoch 1/10
20000/20000 [==============================] - 55s 3ms/sample - loss: 6.2944 - val_loss: 0.8832
Epoch 2/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 2.3380 - val_loss: 0.8004
Epoch 3/10
20000/20000 [==============================] - 49s 2ms/sample - loss: 1.9149 - val_loss: 0.6378
Epoch 4/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 1.8121 - val_loss: 0.4890
Epoch 5/10
20000/20000 [==============================] - 52s 3ms/sample - loss: 1.7023 - val_loss: 0.4793
Epoch 6/10
20000/20000 [==============================] - 50s 2ms/sample - loss: 1.7116 - val_loss: 0.5745
Epoch 7/10
20000/20000 [==============================] - 51s 3ms/sample - loss: 1.7073 - val_loss: 0.4136
Epoch 8/10
20000/20000 [==============================] - 51s 3ms/sample - loss: 1.6392 - val_loss: 0.6285
Epoch 9/10
20000/20000 [==============================] - 50s 2ms/sample - loss: 1.6129 - val_loss: 0.6862
Epoch 10/10
20000/20000 [==============================] - 50s 3ms/sample - loss: 1.5222 - val_loss: 0.4429

In [177]:
def model_v2_2x_dropout():
    
    model = Sequential()
    model.add(GRU(200, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dropout(0.25))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_dbl_dropout = model_v2_2x_dropout()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_dbl_dropout.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    callbacks=[es], epochs=20, use_multiprocessing=True)


Train on 20000 samples, validate on 10000 samples
Epoch 1/20
20000/20000 [==============================] - 24s 1ms/sample - loss: 18.9925 - val_loss: 2.0618
Epoch 2/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 4.9002 - val_loss: 1.2547
Epoch 3/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.7768 - val_loss: 0.6070
Epoch 4/20
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.2649 - val_loss: 1.2479

In [200]:
def model_v2_big_gru():
    
    model = Sequential()
    model.add(GRU(400, activation=LeakyReLU(), input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

v2_model_big_gru = model_v2_big_gru()
es = EarlyStopping(monitor='val_loss', mode='min')
history = v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=10, use_multiprocessing=True)


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-200-da22d57a70b0> in <module>
     10     return model
     11 
---> 12 v2_model_big_gru = model_v2_big_gru()
     13 es = EarlyStopping(monitor='val_loss', mode='min')
     14 history = v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],

<ipython-input-200-da22d57a70b0> in model_v2_big_gru()
      2 
      3     model = Sequential()
----> 4     model.add(GRU(400, activation=LeakyReLU(), input_shape=(1, None, N_DETECTORS-1, N_KINEMATICS)))
      5     model.add(Dense(100, activation=LeakyReLU()))
      6     model.add(Dropout(0.25))

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/training/tracking/base.py in _method_wrapper(self, *args, **kwargs)
    455     self._self_setattr_tracking = False  # pylint: disable=protected-access
    456     try:
--> 457       result = method(self, *args, **kwargs)
    458     finally:
    459       self._self_setattr_tracking = previous_value  # pylint: disable=protected-access

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/sequential.py in add(self, layer)
    175           # and create the node connecting the current layer
    176           # to the input layer we just created.
--> 177           layer(x)
    178           set_inputs = True
    179 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/layers/recurrent.py in __call__(self, inputs, initial_state, constants, **kwargs)
    621 
    622     if initial_state is None and constants is None:
--> 623       return super(RNN, self).__call__(inputs, **kwargs)
    624 
    625     # If any of `initial_state` or `constants` are specified and are Keras

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    770         # are casted, not before.
    771         input_spec.assert_input_compatibility(self.input_spec, inputs,
--> 772                                               self.name)
    773         graph = backend.get_graph()
    774         with graph.as_default(), backend.name_scope(self._name_scope()):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
    175                          'expected ndim=' + str(spec.ndim) + ', found ndim=' +
    176                          str(ndim) + '. Full shape received: ' +
--> 177                          str(x.shape.as_list()))
    178     if spec.max_ndim is not None:
    179       ndim = x.shape.ndims

ValueError: Input 0 of layer gru_14 is incompatible with the layer: expected ndim=3, found ndim=5. Full shape received: [None, 1, None, 24, 6]

In [181]:
v2_model_big_gru.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                    validation_data=(X_test_array, y_test_array),
                    #callbacks=[es],
                    epochs=15, use_multiprocessing=True, initial_epoch=10)


Train on 20000 samples, validate on 10000 samples
Epoch 11/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.3613 - val_loss: 0.5861
Epoch 12/15
20000/20000 [==============================] - 55s 3ms/sample - loss: 1.3655 - val_loss: 0.2373
Epoch 13/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2724 - val_loss: 0.1703
Epoch 14/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2940 - val_loss: 0.3137
Epoch 15/15
20000/20000 [==============================] - 54s 3ms/sample - loss: 1.2810 - val_loss: 0.1698
Out[181]:
<tensorflow.python.keras.callbacks.History at 0x1a8e946748>

Try CNN LSTM


In [221]:
X_train_array.shape


Out[221]:
(194601, 24, 6)

In [240]:
def cnn_gru():
    
    model = Sequential()
    model.add(Conv1D(filters=5, kernel_size=2, strides=1, input_shape=(N_DETECTORS-1, N_KINEMATICS)))
    #model.add(MaxPooling1D())
    model.add(GRU(200, activation=LeakyReLU()))
    model.add(Dense(100, activation=LeakyReLU()))
    model.add(Dropout(0.25))
    model.add(Dense(N_KINEMATICS-1))
    model.compile(loss='mse', optimizer='adam')
    
    return model

cnn_model = cnn_gru()
cnn_model.summary()


Model: "sequential_73"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv1d_20 (Conv1D)           (None, 23, 5)             65        
_________________________________________________________________
gru_25 (GRU)                 (None, 200)               124200    
_________________________________________________________________
dense_69 (Dense)             (None, 100)               20100     
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_70 (Dense)             (None, 5)                 505       
=================================================================
Total params: 144,870
Trainable params: 144,870
Non-trainable params: 0
_________________________________________________________________

In [241]:
#es = EarlyStopping(monitor='val_loss', mode='min')
history = cnn_model.fit(x=X_train_array[:20000], y=y_train_array[:20000],
                      validation_data=(X_test_array, y_test_array),
                      epochs=10, use_multiprocessing=True)


Train on 20000 samples, validate on 10000 samples
Epoch 1/10
20000/20000 [==============================] - 23s 1ms/sample - loss: 13.3734 - val_loss: 1.6757
Epoch 2/10
20000/20000 [==============================] - 22s 1ms/sample - loss: 3.0693 - val_loss: 0.8470
Epoch 3/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.6314 - val_loss: 0.7303
Epoch 4/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.1451 - val_loss: 0.8053
Epoch 5/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 2.0921 - val_loss: 0.7273
Epoch 6/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.9285 - val_loss: 0.4551
Epoch 7/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.8432 - val_loss: 0.6966
Epoch 8/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.6800 - val_loss: 0.5120
Epoch 9/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.5675 - val_loss: 0.3390
Epoch 10/10
20000/20000 [==============================] - 21s 1ms/sample - loss: 1.5159 - val_loss: 0.3983

In [251]:
history.history


Out[251]:
{'loss': [3.3281039200901317,
  1.6039592637484563,
  1.3489888134789536,
  1.2624885631565317,
  1.2353142021468715,
  1.211998767219029,
  1.1837373140878185,
  1.1759768705626037],
 'val_loss': [0.778679012966156,
  0.5407980192184448,
  0.5594191231250762,
  0.4179811120986939,
  0.27897539434432983,
  0.18599163811206817,
  0.1257927789211273,
  0.10037544323205948]}

Enough tinkering around

  • Formalize this into some scripts
  • Make predictions on competition test data

In [1]:
from train import train
from predict import predict

In [2]:
model = train(frac=1.00, filename="dannowitz_jlab2_model", epochs=100, ret_model=True)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
gru (GRU)                    (None, 24, 30)            3420      
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 24, 30)            0         
_________________________________________________________________
gru_1 (GRU)                  (None, 24, 30)            5580      
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 24, 30)            0         
_________________________________________________________________
gru_2 (GRU)                  (None, 30)                5580      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 30)                0         
_________________________________________________________________
dense (Dense)                (None, 100)               3100      
_________________________________________________________________
leaky_re_lu_3 (LeakyReLU)    (None, 100)               0         
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 505       
=================================================================
Total params: 18,185
Trainable params: 18,185
Non-trainable params: 0
_________________________________________________________________
None
Train on 194601 samples, validate on 10000 samples
Epoch 1/100
194601/194601 [==============================] - 308s 2ms/sample - loss: 5.6312 - val_loss: 1.2376
Epoch 2/100
103392/194601 [==============>...............] - ETA: 2:17 - loss: 2.2091WARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: loss
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-2-7f967f082214> in <module>
----> 1 model = train(frac=1.00, filename="dannowitz_jlab2_model", epochs=100, ret_model=True)

~/Work/polymath-progression-blog/jlab-ml-lunch-2/train.py in train(frac, filename, epochs, ret_model)
     90         validation_data=(X_test, y_test),
     91         callbacks=[es],
---> 92         epochs=epochs,
     93         use_multiprocessing=True,
     94     )

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    732         max_queue_size=max_queue_size,
    733         workers=workers,
--> 734         use_multiprocessing=use_multiprocessing)
    735 
    736   def evaluate(self,

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)
    322                 mode=ModeKeys.TRAIN,
    323                 training_context=training_context,
--> 324                 total_epochs=epochs)
    325             cbks.make_logs(model, epoch_logs, training_result, ModeKeys.TRAIN)
    326 

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in run_one_epoch(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)
    121         step=step, mode=mode, size=current_batch_size) as batch_logs:
    122       try:
--> 123         batch_outs = execution_function(iterator)
    124       except (StopIteration, errors.OutOfRangeError):
    125         # TODO(kaftan): File bug about tf function and errors.OutOfRangeError?

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2_utils.py in execution_function(input_fn)
     84     # `numpy` translates Tensors to values in Eager mode.
     85     return nest.map_structure(_non_none_constant_value,
---> 86                               distributed_function(input_fn))
     87 
     88   return execution_function

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py in __call__(self, *args, **kwds)
    413       # In this case we have created variables on the first call, so we run the
    414       # defunned version which is guaranteed to never create variables.
--> 415       return self._stateless_fn(*args, **kwds)  # pylint: disable=not-callable
    416     elif self._stateful_fn is not None:
    417       # In this case we have not created variables on the first call. So we can

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in __call__(self, *args, **kwargs)
   1820     """Calls a graph function specialized to the inputs."""
   1821     graph_function, args, kwargs = self._maybe_define_function(args, kwargs)
-> 1822     return graph_function._filtered_call(args, kwargs)  # pylint: disable=protected-access
   1823 
   1824   @property

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _filtered_call(self, args, kwargs)
   1139          if isinstance(t, (ops.Tensor,
   1140                            resource_variable_ops.BaseResourceVariable))),
-> 1141         self.captured_inputs)
   1142 
   1143   def _call_flat(self, args, captured_inputs, cancellation_manager=None):

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1222     if executing_eagerly:
   1223       flat_outputs = forward_function.call(
-> 1224           ctx, args, cancellation_manager=cancellation_manager)
   1225     else:
   1226       gradient_name = self._delayed_rewrite_functions.register()

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py in call(self, ctx, args, cancellation_manager)
    509               inputs=args,
    510               attrs=("executor_type", executor_type, "config_proto", config),
--> 511               ctx=ctx)
    512         else:
    513           outputs = execute.execute_with_cancellation(

//anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     59     tensors = pywrap_tensorflow.TFE_Py_Execute(ctx._handle, device_name,
     60                                                op_name, inputs, attrs,
---> 61                                                num_outputs)
     62   except core._NotOkStatusException as e:
     63     if name is not None:

KeyboardInterrupt: 

In [ ]:
preds = predict(model_filename="dannowitz_jlab2_model.h5",
                data_filename="test_in (1).csv",
                output_filename="danowitz_jlab2_submission.csv")