In [1]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)


1.12.0

In [2]:
!curl -O https://raw.githubusercontent.com/DJCordhose/deep-learning-crash-course-notebooks/master/data/insurance-customers-1500.csv


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 26783  100 26783    0     0   138k      0 --:--:-- --:--:-- --:--:--  138k

In [0]:
from tensorflow import keras
from tensorflow.keras.layers import Input, Flatten, GlobalAveragePooling1D, Dense
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.utils import to_categorical

In [0]:
import pandas as pd
df = pd.read_csv('./insurance-customers-1500.csv', sep=';')
one_hot_groups = pd.DataFrame(to_categorical(df['group']), columns={'red', 'green', 'yellow'})
main = df.drop('group', axis='columns')

In [5]:
main.head()


Out[5]:
speed age miles
0 98.0 44.0 25.0
1 118.0 54.0 24.0
2 111.0 26.0 34.0
3 97.0 25.0 10.0
4 114.0 38.0 22.0

In [6]:
one_hot_groups.head()


Out[6]:
green red yellow
0 0.0 1.0 0.0
1 0.0 1.0 0.0
2 1.0 0.0 0.0
3 0.0 0.0 1.0
4 0.0 1.0 0.0

Challenge: we could feed in both parts in a united way, but they need different loss functions

options:


In [7]:
# often does not even train, so choose a seed known to be good
from tensorflow.keras.initializers import glorot_normal
seed= 17

from tensorflow.keras.layers import concatenate, average, add

# first multi in
main_input = Input(shape=(3,), name='main_input')
group_input = Input(shape=(3,), name='group_input')

# hopefully this balances out inputs to same scale
encoded_main = Dense(units=2, activation='relu', name="main_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(main_input)
encoded_group = Dense(units=2, activation='relu', name="group_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(group_input)

# latent representation

merged = concatenate([encoded_main, encoded_group])
# this might work as well
# merged = average([encoded_main, encoded_group])

# the effective latent encoding
encoding_dim = 2
encoded = Dense(units=encoding_dim, activation='relu', name="encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(merged)

# then multi out
main_output = Dense(units=3, activation='linear', name="main_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)
group_output = Dense(units=3, activation='softmax', name="group_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)

autoencoder = Model(inputs=[main_input, group_input], outputs=[main_output, group_output])

adam = keras.optimizers.Adam(lr=0.01)
# adam = keras.optimizers.Adam()

autoencoder.compile(optimizer=adam,
              loss={'main_output': 'mae', 'group_output': 'categorical_crossentropy'},
              loss_weights={'main_output': 1., 'group_output': 50.})

autoencoder.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
main_input (InputLayer)         (None, 3)            0                                            
__________________________________________________________________________________________________
group_input (InputLayer)        (None, 3)            0                                            
__________________________________________________________________________________________________
main_encoder (Dense)            (None, 2)            8           main_input[0][0]                 
__________________________________________________________________________________________________
group_encoder (Dense)           (None, 2)            8           group_input[0][0]                
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 4)            0           main_encoder[0][0]               
                                                                 group_encoder[0][0]              
__________________________________________________________________________________________________
encoder (Dense)                 (None, 2)            10          concatenate[0][0]                
__________________________________________________________________________________________________
main_output (Dense)             (None, 3)            9           encoder[0][0]                    
__________________________________________________________________________________________________
group_output (Dense)            (None, 3)            9           encoder[0][0]                    
==================================================================================================
Total params: 44
Trainable params: 44
Non-trainable params: 0
__________________________________________________________________________________________________

In [8]:
BATCH_SIZE = 1
EPOCHS=10

%time history = autoencoder.fit(\
    x={'main_input': main, 'group_input': one_hot_groups},\
    y={'main_output': main, 'group_output': one_hot_groups},\
    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)


Epoch 1/10
1500/1500 [==============================] - 11s 7ms/step - loss: 61.6617 - main_output_loss: 47.4692 - group_output_loss: 0.2839
Epoch 2/10
1500/1500 [==============================] - 10s 7ms/step - loss: 36.5731 - main_output_loss: 33.4707 - group_output_loss: 0.0620
Epoch 3/10
1500/1500 [==============================] - 10s 7ms/step - loss: 31.5952 - main_output_loss: 29.7206 - group_output_loss: 0.0375
Epoch 4/10
1500/1500 [==============================] - 10s 7ms/step - loss: 28.3729 - main_output_loss: 27.0126 - group_output_loss: 0.0272
Epoch 5/10
1500/1500 [==============================] - 10s 7ms/step - loss: 25.8566 - main_output_loss: 24.8839 - group_output_loss: 0.0195
Epoch 6/10
1500/1500 [==============================] - 10s 7ms/step - loss: 23.8403 - main_output_loss: 23.0631 - group_output_loss: 0.0155
Epoch 7/10
1500/1500 [==============================] - 10s 7ms/step - loss: 22.2121 - main_output_loss: 21.6489 - group_output_loss: 0.0113
Epoch 8/10
1500/1500 [==============================] - 10s 7ms/step - loss: 20.8655 - main_output_loss: 20.4257 - group_output_loss: 0.0088
Epoch 9/10
1500/1500 [==============================] - 10s 7ms/step - loss: 19.8176 - main_output_loss: 19.4876 - group_output_loss: 0.0066
Epoch 10/10
1500/1500 [==============================] - 10s 7ms/step - loss: 18.8502 - main_output_loss: 18.6064 - group_output_loss: 0.0049
CPU times: user 2min 11s, sys: 18.1 s, total: 2min 29s
Wall time: 1min 45s

In [9]:
import matplotlib.pyplot as plt

plt.ylabel('loss')
plt.xlabel('epoch')
plt.yscale('log')

plt.plot(history.history['loss'], 'b')
plt.plot(history.history['main_output_loss'], 'r')
plt.plot(history.history['group_output_loss'], 'g')

plt.legend(['combined loss', 'main loss', 'group loss'], loc='upper right')


Out[9]:
<matplotlib.legend.Legend at 0x7f6140562080>

In [0]:
samples = df.sample(10).reset_index(drop=True)
main_samples = samples.drop('group', axis='columns')
group_samples = pd.DataFrame(to_categorical(samples['group']), columns={'red', 'green', 'yellow'})

In [11]:
merged_samples = pd.concat([main_samples, group_samples], axis=1)
merged_samples


Out[11]:
speed age miles green red yellow
0 151.0 36.0 50.0 1.0 0.0 0.0
1 139.0 46.0 40.0 0.0 1.0 0.0
2 105.0 37.0 1.0 0.0 0.0 1.0
3 136.0 63.0 31.0 0.0 0.0 1.0
4 107.0 43.0 5.0 0.0 0.0 1.0
5 97.0 18.0 16.0 0.0 0.0 1.0
6 100.0 41.0 23.0 0.0 1.0 0.0
7 120.0 45.0 38.0 0.0 1.0 0.0
8 124.0 45.0 30.0 1.0 0.0 0.0
9 129.0 56.0 7.0 0.0 0.0 1.0

In [12]:
autoencoder.predict([main_samples, group_samples])


Out[12]:
[array([[117.63666 ,  40.05116 ,  30.855614],
        [ 71.85322 ,  42.353855,  26.083645],
        [134.22884 ,  39.216652,  32.585   ],
        [134.22884 ,  39.216652,  32.585   ],
        [134.22884 ,  39.216652,  32.585   ],
        [134.22884 ,  39.216652,  32.585   ],
        [ 71.85322 ,  42.353855,  26.083645],
        [ 71.85322 ,  42.353855,  26.083645],
        [117.63666 ,  40.05116 ,  30.855614],
        [134.22884 ,  39.216652,  32.585   ]], dtype=float32),
 array([[9.86562431e-01, 5.59623174e-08, 1.34376325e-02],
        [9.63175553e-05, 9.99903679e-01, 5.94358097e-18],
        [5.65276947e-03, 2.65732531e-14, 9.94347274e-01],
        [5.65276947e-03, 2.65732531e-14, 9.94347274e-01],
        [5.65276947e-03, 2.65732531e-14, 9.94347274e-01],
        [5.65276947e-03, 2.65732531e-14, 9.94347274e-01],
        [9.63175553e-05, 9.99903679e-01, 5.94358097e-18],
        [9.63175553e-05, 9.99903679e-01, 5.94358097e-18],
        [9.86562431e-01, 5.59623174e-08, 1.34376325e-02],
        [5.65276947e-03, 2.65732531e-14, 9.94347274e-01]], dtype=float32)]

In [0]:
encoder = Model(inputs=[main_input, group_input], outputs=encoded)
latent_representation = encoder.predict(x={'main_input': main, 'group_input': one_hot_groups})

In [14]:
latent_representation.shape


Out[14]:
(1500, 2)

In [15]:
from matplotlib.colors import ListedColormap

# * 0 - red: many accidents
# * 1 - green: few or no accidents
# * 2 - yellow: in the middle
colors = df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[15]:
<matplotlib.collections.PathCollection at 0x7f61403b1358>

What a funny result: trained to (sometims almost) perfectly reproduce the groups, but not much of anything else

quite obviously we ware emphasizing the group loss way too much

Making results more stable

  • when you visualize latent spaces they should not change much when re-training or fitting additional data points
  • when working with autoencoders or embeddings there are two ways to make that happen
    1. save model, do not retrain from scratch and only fit new data points with low learning rate
    2. save output from embedding and keep new latent space similar by adding to the loss function

In [0]:
# save complete model
autoencoder.save('autoencoder-v1.h5')
del autoencoder

Option I: Saving and reloading model, retrain


In [0]:
# later...
from tensorflow.keras.models import load_model
autoencoder = load_model('autoencoder-v1.h5')

In [0]:
main_input = autoencoder.get_layer('main_input').input
group_input = autoencoder.get_layer('group_input').input
encode = autoencoder.get_layer('encoder').output
encoder = Model(inputs=[main_input, group_input], outputs=encode)

In [19]:
latent_representation = encoder.predict(x={'main_input': main, 'group_input': one_hot_groups})
colors = df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('original data, old model')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[19]:
<matplotlib.collections.PathCollection at 0x7f6129b0e860>

Using new dataset


In [20]:
!curl -O https://raw.githubusercontent.com/DJCordhose/deep-learning-crash-course-notebooks/master/data/insurance-customers-300.csv


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5376  100  5376    0     0  27015      0 --:--:-- --:--:-- --:--:-- 27015

In [0]:
import pandas as pd

new_df = pd.read_csv('./insurance-customers-300.csv', sep=';')
new_one_hot_groups = pd.DataFrame(to_categorical(new_df['group']), columns={'red', 'green', 'yellow'})
new_main = new_df.drop('group', axis='columns')

In [22]:
new_df.describe()


Out[22]:
speed age miles group
count 300.000000 300.000000 300.000000 300.000000
mean 122.963333 44.000000 31.133333 1.000000
std 17.135170 16.312992 15.568885 0.817861
min 85.000000 16.000000 1.000000 0.000000
25% 109.750000 33.000000 18.000000 0.000000
50% 121.000000 42.000000 30.000000 1.000000
75% 137.000000 52.000000 43.000000 2.000000
max 161.000000 97.000000 99.000000 2.000000

In [23]:
latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
colors = new_df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('new data, old model')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[23]:
<matplotlib.collections.PathCollection at 0x7f61299dcc88>

In [24]:
#default lr
# adam = keras.optimizers.Adam(lr=0.001)
# we use just half of that
adam = keras.optimizers.Adam(lr=0.0005)
# even this high learning rate can not break it (moves it quite a bit, but still same overall shape)
# adam = keras.optimizers.Adam(lr=0.1)

autoencoder.compile(optimizer=adam,
              loss={'main_output': 'mae', 'group_output': 'categorical_crossentropy'},
              loss_weights={'main_output': 1., 'group_output': 50.})

BATCH_SIZE = 1
EPOCHS=10

%time history = autoencoder.fit(\
    x={'main_input': new_main, 'group_input': new_one_hot_groups},\
    y={'main_output': new_main, 'group_output': new_one_hot_groups},\
    epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, verbose=1)


Epoch 1/10
300/300 [==============================] - 3s 9ms/step - loss: 18.0994 - main_output_loss: 17.8307 - group_output_loss: 0.0054
Epoch 2/10
300/300 [==============================] - 2s 7ms/step - loss: 18.0753 - main_output_loss: 17.8171 - group_output_loss: 0.0052
Epoch 3/10
300/300 [==============================] - 2s 7ms/step - loss: 18.0563 - main_output_loss: 17.8227 - group_output_loss: 0.0047
Epoch 4/10
300/300 [==============================] - 2s 7ms/step - loss: 18.0384 - main_output_loss: 17.8176 - group_output_loss: 0.0044
Epoch 5/10
300/300 [==============================] - 2s 7ms/step - loss: 18.0227 - main_output_loss: 17.8034 - group_output_loss: 0.0044
Epoch 6/10
300/300 [==============================] - 2s 7ms/step - loss: 18.0082 - main_output_loss: 17.7958 - group_output_loss: 0.0042
Epoch 7/10
300/300 [==============================] - 2s 7ms/step - loss: 17.9956 - main_output_loss: 17.7859 - group_output_loss: 0.0042
Epoch 8/10
300/300 [==============================] - 2s 7ms/step - loss: 17.9855 - main_output_loss: 17.7800 - group_output_loss: 0.0041
Epoch 9/10
300/300 [==============================] - 2s 7ms/step - loss: 17.9784 - main_output_loss: 17.7683 - group_output_loss: 0.0042
Epoch 10/10
300/300 [==============================] - 2s 7ms/step - loss: 17.9662 - main_output_loss: 17.7616 - group_output_loss: 0.0041
CPU times: user 27.4 s, sys: 3.57 s, total: 31 s
Wall time: 22.2 s

In [25]:
plt.ylabel('loss')
plt.xlabel('epoch')
plt.yscale('log')

plt.plot(history.history['loss'], 'b')
plt.plot(history.history['main_output_loss'], 'r')
plt.plot(history.history['group_output_loss'], 'g')

plt.legend(['combined loss', 'main loss', 'group loss'], loc='upper right')


Out[25]:
<matplotlib.legend.Legend at 0x7f61290fd6d8>

In [26]:
latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
colors = new_df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('updated model, new data')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[26]:
<matplotlib.collections.PathCollection at 0x7f6129031b38>

In [27]:
latent_representation = encoder.predict(x={'main_input': main, 'group_input': one_hot_groups})
colors = df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('updated model, original data')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[27]:
<matplotlib.collections.PathCollection at 0x7f6128f9dc50>

Option II: Train from scratch, but use difference to original latent representation as part of loss function


In [0]:
# start over from scratch
from tensorflow.keras.models import load_model
autoencoder = load_model('autoencoder-v1.h5')

In [0]:
main_input = autoencoder.get_layer('main_input').input
group_input = autoencoder.get_layer('group_input').input
encode = autoencoder.get_layer('encoder').output
encoder = Model(inputs=[main_input, group_input], outputs=encode)

In [0]:
new_original_latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
old_original_latent_representation = encoder.predict(x={'main_input': main, 'group_input': one_hot_groups})

In [31]:
latent_representation = encoder.predict(x={'main_input': main, 'group_input': one_hot_groups})
colors = df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('original data, original model')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[31]:
<matplotlib.collections.PathCollection at 0x7f612869bc18>

In [32]:
latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
colors = new_df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('new data, original model')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[32]:
<matplotlib.collections.PathCollection at 0x7f6128607710>

In [33]:
# often does not even train, so choose a seed known to be good
from tensorflow.keras.initializers import glorot_normal
seed= 13

# first multi in
main_input = Input(shape=(3,), name='main_input')
group_input = Input(shape=(3,), name='group_input')

# hopefully this balances out inputs to same scale
encoded_main = Dense(units=2, activation='relu', name="main_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(main_input)
encoded_group = Dense(units=2, activation='relu', name="group_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(group_input)

# latent representation

merged = concatenate([encoded_main, encoded_group])
# this might work as well
# merged = average([encoded_main, encoded_group])

# the effective latent encoding
encoding_dim = 2
encoded = Dense(units=encoding_dim, activation='relu', name="encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(merged)

# then multi out

main_output = Dense(units=3, activation='linear', name="main_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)
group_output = Dense(units=3, activation='softmax', name="group_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)
stability_output = encoded # just the latent space as output

# autoencoder = Model(inputs=[main_input, group_input], outputs=[main_output, group_output, stability_output])
autoencoder = Model(inputs=[main_input, group_input], outputs=[main_output, group_output])

# adam = keras.optimizers.Adam(lr=0.001)
adam = keras.optimizers.Adam(lr=0.01)

# autoencoder.compile(optimizer=adam,
#               loss={'main_output': 'mae', 'group_output': 'categorical_crossentropy', 'encoder': 'mae' },
#               loss_weights={'main_output': 1., 'group_output': 50., 'encoder': 10.})

autoencoder.compile(optimizer=adam,
              loss={'main_output': 'mae', 'group_output': 'categorical_crossentropy'},
              loss_weights={'main_output': 1., 'group_output': 1000.})

BATCH_SIZE = 1
EPOCHS=20

# %time history = autoencoder.fit(\
#     x={'main_input': new_main, 'group_input': new_one_hot_groups},\
#     y={'main_output': new_main, 'group_output': new_one_hot_groups, 'encoder': new_original_latent_representation},\
#     epochs=EPOCHS, batch_size=BATCH_SIZE, shuffle=True, verbose=1)

%time history = autoencoder.fit(\
    x={'main_input': new_main, 'group_input': new_one_hot_groups},\
    y={'main_output': new_main, 'group_output': new_one_hot_groups},\
    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)


Epoch 1/20
300/300 [==============================] - 3s 9ms/step - loss: 1113.7896 - main_output_loss: 64.0727 - group_output_loss: 1.0497
Epoch 2/20
300/300 [==============================] - 2s 7ms/step - loss: 759.8672 - main_output_loss: 57.7043 - group_output_loss: 0.7022
Epoch 3/20
300/300 [==============================] - 2s 7ms/step - loss: 609.1736 - main_output_loss: 50.1857 - group_output_loss: 0.5590
Epoch 4/20
300/300 [==============================] - 2s 7ms/step - loss: 564.2294 - main_output_loss: 44.8930 - group_output_loss: 0.5193
Epoch 5/20
300/300 [==============================] - 2s 7ms/step - loss: 545.1656 - main_output_loss: 41.0447 - group_output_loss: 0.5041
Epoch 6/20
300/300 [==============================] - 2s 7ms/step - loss: 526.2991 - main_output_loss: 37.7498 - group_output_loss: 0.4885
Epoch 7/20
300/300 [==============================] - 2s 7ms/step - loss: 520.0490 - main_output_loss: 34.3416 - group_output_loss: 0.4857
Epoch 8/20
300/300 [==============================] - 2s 7ms/step - loss: 511.0741 - main_output_loss: 31.0153 - group_output_loss: 0.4801
Epoch 9/20
300/300 [==============================] - 2s 7ms/step - loss: 505.2549 - main_output_loss: 27.8453 - group_output_loss: 0.4774
Epoch 10/20
300/300 [==============================] - 2s 7ms/step - loss: 512.4288 - main_output_loss: 26.3003 - group_output_loss: 0.4861
Epoch 11/20
300/300 [==============================] - 2s 7ms/step - loss: 499.2340 - main_output_loss: 25.4253 - group_output_loss: 0.4738
Epoch 12/20
300/300 [==============================] - 2s 7ms/step - loss: 506.1020 - main_output_loss: 24.9102 - group_output_loss: 0.4812
Epoch 13/20
300/300 [==============================] - 2s 7ms/step - loss: 500.3039 - main_output_loss: 24.3554 - group_output_loss: 0.4759
Epoch 14/20
300/300 [==============================] - 2s 7ms/step - loss: 495.0908 - main_output_loss: 24.0149 - group_output_loss: 0.4711
Epoch 15/20
300/300 [==============================] - 2s 7ms/step - loss: 498.6207 - main_output_loss: 23.7325 - group_output_loss: 0.4749
Epoch 16/20
300/300 [==============================] - 2s 7ms/step - loss: 502.8185 - main_output_loss: 23.5568 - group_output_loss: 0.4793
Epoch 17/20
300/300 [==============================] - 2s 7ms/step - loss: 496.3739 - main_output_loss: 23.2007 - group_output_loss: 0.4732
Epoch 18/20
300/300 [==============================] - 2s 7ms/step - loss: 498.2794 - main_output_loss: 22.8270 - group_output_loss: 0.4755
Epoch 19/20
300/300 [==============================] - 2s 7ms/step - loss: 496.4002 - main_output_loss: 22.7899 - group_output_loss: 0.4736
Epoch 20/20
300/300 [==============================] - 2s 7ms/step - loss: 493.3310 - main_output_loss: 22.3431 - group_output_loss: 0.4710
CPU times: user 54.1 s, sys: 7.12 s, total: 1min 1s
Wall time: 43.7 s

In [0]:
main_input = autoencoder.get_layer('main_input').input
group_input = autoencoder.get_layer('group_input').input
encode = autoencoder.get_layer('encoder').output
encoder = Model(inputs=[main_input, group_input], outputs=encode)

In [35]:
latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
colors = new_df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('new data, new model without forcing')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[35]:
<matplotlib.collections.PathCollection at 0x7f6127cec3c8>

In [36]:
# often does not even train, so choose a seed known to be good
from tensorflow.keras.initializers import glorot_normal
seed= 13

# first multi in
main_input = Input(shape=(3,), name='main_input')
group_input = Input(shape=(3,), name='group_input')

# hopefully this balances out inputs to same scale
encoded_main = Dense(units=2, activation='relu', name="main_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(main_input)
encoded_group = Dense(units=2, activation='relu', name="group_encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(group_input)

# latent representation

merged = concatenate([encoded_main, encoded_group])
# this might work as well
# merged = average([encoded_main, encoded_group])

# the effective latent encoding
encoding_dim = 2
encoded = Dense(units=encoding_dim, activation='relu', name="encoder", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(merged)

# then multi out

main_output = Dense(units=3, activation='linear', name="main_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)
group_output = Dense(units=3, activation='softmax', name="group_output", bias_initializer='zeros', kernel_initializer=glorot_normal(seed=seed))(encoded)
stability_output = encoded # just the latent space as output

autoencoder = Model(inputs=[main_input, group_input], outputs=[main_output, group_output, stability_output])

# adam = keras.optimizers.Adam(lr=0.001)
adam = keras.optimizers.Adam(lr=0.01)

autoencoder.compile(optimizer=adam,
              loss={'main_output': 'mae', 'group_output': 'categorical_crossentropy', 'encoder': 'mae' },
              loss_weights={'main_output': 1., 'group_output': 100., 'encoder': 100.})


BATCH_SIZE = 1
EPOCHS=20

%time history = autoencoder.fit(\
    x={'main_input': new_main, 'group_input': new_one_hot_groups},\
    y={'main_output': new_main, 'group_output': new_one_hot_groups, 'encoder': new_original_latent_representation},\
    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=1)


Epoch 1/20
300/300 [==============================] - 3s 11ms/step - loss: 359.0142 - main_output_loss: 57.9943 - group_output_loss: 1.1204 - encoder_loss: 1.8898
Epoch 2/20
300/300 [==============================] - 2s 8ms/step - loss: 139.8898 - main_output_loss: 44.7942 - group_output_loss: 0.7537 - encoder_loss: 0.1972
Epoch 3/20
300/300 [==============================] - 2s 8ms/step - loss: 94.7738 - main_output_loss: 38.8588 - group_output_loss: 0.5449 - encoder_loss: 0.0143
Epoch 4/20
300/300 [==============================] - 2s 8ms/step - loss: 83.5354 - main_output_loss: 33.9211 - group_output_loss: 0.4820 - encoder_loss: 0.0141
Epoch 5/20
300/300 [==============================] - 2s 8ms/step - loss: 76.6547 - main_output_loss: 31.6733 - group_output_loss: 0.4362 - encoder_loss: 0.0136
Epoch 6/20
300/300 [==============================] - 2s 8ms/step - loss: 73.3537 - main_output_loss: 30.4277 - group_output_loss: 0.4116 - encoder_loss: 0.0177
Epoch 7/20
300/300 [==============================] - 2s 8ms/step - loss: 69.4638 - main_output_loss: 29.7551 - group_output_loss: 0.3848 - encoder_loss: 0.0123
Epoch 8/20
300/300 [==============================] - 2s 8ms/step - loss: 65.5596 - main_output_loss: 29.2934 - group_output_loss: 0.3510 - encoder_loss: 0.0117
Epoch 9/20
300/300 [==============================] - 2s 8ms/step - loss: 63.3000 - main_output_loss: 28.9237 - group_output_loss: 0.3364 - encoder_loss: 0.0074
Epoch 10/20
300/300 [==============================] - 2s 8ms/step - loss: 61.9612 - main_output_loss: 28.6323 - group_output_loss: 0.3174 - encoder_loss: 0.0159
Epoch 11/20
300/300 [==============================] - 2s 8ms/step - loss: 58.5541 - main_output_loss: 28.2746 - group_output_loss: 0.2883 - encoder_loss: 0.0145
Epoch 12/20
300/300 [==============================] - 2s 8ms/step - loss: 57.0897 - main_output_loss: 27.9404 - group_output_loss: 0.2771 - encoder_loss: 0.0144
Epoch 13/20
300/300 [==============================] - 2s 8ms/step - loss: 54.0290 - main_output_loss: 27.6013 - group_output_loss: 0.2514 - encoder_loss: 0.0129
Epoch 14/20
300/300 [==============================] - 2s 8ms/step - loss: 52.5104 - main_output_loss: 27.2802 - group_output_loss: 0.2397 - encoder_loss: 0.0126
Epoch 15/20
300/300 [==============================] - 2s 8ms/step - loss: 50.7044 - main_output_loss: 26.9363 - group_output_loss: 0.2243 - encoder_loss: 0.0134
Epoch 16/20
300/300 [==============================] - 2s 8ms/step - loss: 48.3359 - main_output_loss: 26.6366 - group_output_loss: 0.2052 - encoder_loss: 0.0118
Epoch 17/20
300/300 [==============================] - 2s 8ms/step - loss: 47.4239 - main_output_loss: 26.3338 - group_output_loss: 0.1923 - encoder_loss: 0.0186
Epoch 18/20
300/300 [==============================] - 2s 8ms/step - loss: 45.8321 - main_output_loss: 26.0685 - group_output_loss: 0.1791 - encoder_loss: 0.0186
Epoch 19/20
300/300 [==============================] - 2s 8ms/step - loss: 43.7928 - main_output_loss: 25.7362 - group_output_loss: 0.1681 - encoder_loss: 0.0125
Epoch 20/20
300/300 [==============================] - 2s 8ms/step - loss: 42.6328 - main_output_loss: 25.4749 - group_output_loss: 0.1557 - encoder_loss: 0.0158
CPU times: user 1min 1s, sys: 8.41 s, total: 1min 10s
Wall time: 48.5 s

In [37]:
main_input = autoencoder.get_layer('main_input').input
group_input = autoencoder.get_layer('group_input').input
encode = autoencoder.get_layer('encoder').output
encoder = Model(inputs=[main_input, group_input], outputs=encode)

latent_representation = encoder.predict(x={'main_input': new_main, 'group_input': new_one_hot_groups})
colors = new_df['group']
color_map = ListedColormap(['#AA4444', '#006000', '#EEEE44'])

latent_x = latent_representation[:, 0]
latent_y = latent_representation[:, 1]

plt.title('new data, new model with forcing')

plt.scatter(latent_x, latent_y, alpha=0.5, s=100, marker='o', edgecolors='w', cmap=color_map, c=colors)


Out[37]:
<matplotlib.collections.PathCollection at 0x7f6127229d68>

In [0]: