In [1]:
import numpy as np
import scipy
# sklearn
from sklearn.preprocessing import OneHotEncoder
# Keras
from keras.models import Sequential
from keras.layers.core import Dense, Reshape, Dropout, Activation, RepeatVector, Flatten, Permute
from keras.layers import Input, merge, Lambda
from keras.layers.convolutional import Convolution1D
from keras.models import Model
from keras import objectives
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.layers.recurrent import LSTM, GRU
from keras.optimizers import Adagrad, Adam
import keras.backend as K
# Other
import matplotlib.pyplot as plt
from copy import deepcopy
import os
import pickle
import pprint
from __future__ import print_function
%matplotlib inline
# Local
import McNeuron
import models_cmc as models
import train_one_by_one as train
import batch_utils
import data_transforms
In [38]:
def encoder(prufer_input, hidden_dim, encoding_dim):
h = Convolution1D(hidden_dim, 5, activation='relu', name='conv1')(prufer_input)
#h = Convolution1D(hidden_dim, 5, activation='relu', name='conv2')(h)
#h = Convolution1D(hidden_dim, 5, activation='relu', name='conv3')(h)
h = Flatten(name='flatten')(h)
return Dense(encoding_dim, activation = 'relu', name='dense1')(h)
In [39]:
def sampler(encoding_dim):
batch_size = K.shape(encoded_mean)[0]
epsilon = K.random_normal(shape=(batch_size, encoding_dim), mean=0., std=0.01)
return encoded_mean + K.exp(encoded_log_var / 2) * epsilon
In [61]:
def variational_encoder(prufer_input, hidden_dim, encoding_dim):
def sampler(args):
z_mean, z_log_var = args
batch_size = K.shape(z_mean)[0]
epsilon = K.random_normal(shape=(batch_size, encoding_dim), mean=0., std=0.01)
return z_mean + K.exp(z_log_var / 2) * epsilon
h = Convolution1D(hidden_dim, 5, activation='relu', name='conv1')(prufer_input)
#h = Convolution1D(hidden_dim, 5, activation='relu', name='conv2')(h)
#h = Convolution1D(hidden_dim, 5, activation='relu', name='conv3')(h)
h = Flatten(name='flatten')(h)
encoded_mean = Dense(encoding_dim, activation = 'linear', name='encoded_mean')(h)
encoded_log_var = Dense(encoding_dim, activation = 'linear', name='encoded_log_var')(h)
encoder_output = Lambda(sampler,
output_shape=(encoding_dim,),
name='encoder_output')([encoded_mean, encoded_log_var])
def vae_loss(y, y_pred):
y = K.flatten(y)
y_pred = K.flatten(y_pred)
xent_loss = max_length * objectives.binary_crossentropy(y, y_pred)
kl_loss = - 0.5 * K.mean(1 + encoded_log_var - K.square(encoded_mean) - K.exp(encoded_log_var), axis=-1)
return xent_loss + kl_loss
return encoded_output, vae_loss
In [62]:
def decoder(decoder_input, n_nodes, hidden_dim, latent_dim):
h = Dense(latent_dim, name='latent_input', activation = 'relu')(decoder_input)
h = RepeatVector(n_nodes-1, name='repeat_vector')(h)
#h = Bidirectional(LSTM(hidden_dim, return_sequences = True, name='lstm1'), merge_mode='sum')(h)
#h = Bidirectional(LSTM(hidden_dim, return_sequences = True, name='lstm2'), merge_mode='sum')(h)
#h = Bidirectional(LSTM(hidden_dim, return_sequences = True, name='lstm3'), merge_mode='sum')(h)
h = GRU(hidden_dim, return_sequences = True, name='lstm1')(h)
#h = GRU(hidden_dim, return_sequences = True, name='lstm2')(h)
#h = GRU(hidden_dim, return_sequences = True, name='lstm3')(h)
return TimeDistributed(Dense(n_nodes, activation='softmax'), name='decoded_softmax')(h)
In [73]:
def autoencoder(prufer_input, n_nodes, hidden_dim, encoding_dim, latent_dim):
enc = encoder(prufer_input, hidden_dim, encoding_dim)
dec = decoder(enc, n_nodes, hidden_dim, latent_dim)
return dec
In [74]:
def variational_autoencoder(prufer_input, n_nodes, hidden_dim, encoding_dim, latent_dim):
enc, vae_loss = variational_encoder(prufer_input, hidden_dim, encoding_dim)
dec = decoder(enc, n_nodes, hidden_dim, latent_dim)
return dec
In [64]:
hidden_dim = 50
encoding_dim = 200
latent_dim = 100
n_nodes = 20
In [67]:
prufer_input = Input(shape=(n_nodes - 2, n_nodes))
#encoded_output = encoder(prufer_input, hidden_dim, encoding_dim)
encoded_output, vae_loss = variational_encoder(prufer_input, hidden_dim, encoding_dim)
decoder_input = Input(shape=(encoding_dim,))
parent_output = decoder(decoder_input, n_nodes, hidden_dim, latent_dim)
#encoding_model = Model(input=prufer_input, output=encoded_output)
#decoding_model = Model(input=decoder_input, output=parent_output)
In [75]:
prufer_input = Input(shape=(n_nodes - 2, n_nodes))
parent_output = autoencoder(prufer_input, n_nodes, hidden_dim, encoding_dim, latent_dim)
autoencoder_model = Model(input=prufer_input, output=parent_output)
autoencoder_model.summary()
In [77]:
prufer_input = Input(shape=(n_nodes - 2, n_nodes))
parent_output = variational_autoencoder(prufer_input, n_nodes, hidden_dim, encoding_dim, latent_dim)
variational_autoencoder_model = Model(input=prufer_input, output=parent_output)
variational_autoencoder_model.summary()
In [30]:
def reorder(parents):
length = len(parents)
# Construct the adjacency matrix
adjacency = np.zeros([length, length])
adjacency[parents[1:], range(1, length)] = 1
# Discover the permutation with Schur decomposition
full_adjacency = np.linalg.inv(np.eye(length) - adjacency)
full_adjacency_permuted, permutation_matrix = \
scipy.linalg.schur(full_adjacency)
# Reorder the parents
parents_reordered = \
np.argmax(np.eye(length) - np.linalg.inv(full_adjacency_permuted),
axis=0)
parents_reordered[0] = -1
return list(parents_reordered)
In [31]:
def generate_training_batch(batch_size, n_nodes):
"""
Generate training examples.
Parameters
----------
batch_size: int
number of examples to generate
n_nodes: int
number of nodes in the tree
Returns
-------
X: 3d array (samples, n_nodes - 2, n_nodes)
softmax representation of prufer sequence
y: 3d array (samples, n_nodes - 1, n_nodes)
softmax representation of parent sequence
"""
X = np.zeros((batch_size, n_nodes - 2, n_nodes))
y = np.zeros((batch_size, n_nodes - 1, n_nodes))
# One-hot encoder
prufer_example = np.random.randint(0, n_nodes-1, (n_nodes-2, 1))
one_hot = OneHotEncoder(n_values=n_nodes)
one_hot = one_hot.fit(prufer_example)
for example in range(batch_size):
# Input: prufer sequence softmax
prufer_array = np.random.randint(0, n_nodes-1, (n_nodes-2, 1))
X[example, :, :] = one_hot.transform(prufer_array).todense()
# Output: parent sequence softmax
parents_ = data_transforms.decode_prufer(list(np.squeeze(prufer_array)))
parents_ = reorder(parents_)[1:]
parents_softmax = one_hot.transform(np.expand_dims(np.array(parents_), axis=1)).todense()
y[example, :, :] = parents_softmax
return X, y
In [32]:
X, y = generate_training_batch(64, n_nodes)
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(X[6, :, :], cmap='Greys', interpolation='none')
plt.subplot(1,2,2)
plt.imshow(y[6, :, :], cmap='Greys', interpolation='none')
plt.show()
In [4]:
# Model parameters
n_nodes = 20
hidden_dim = 50
encoding_dim = 100
latent_dim = 25
# Training parameters
n_epochs = 20
batch_size = 1
n_batch_per_epoch = 1000# np.floor(training_data['morphology']['n20'].shape[0]/batch_size).astype(int)
In [34]:
prufer_input = Input(shape=(n_nodes - 2, n_nodes))
parent_output = autoencoder(prufer_input, n_nodes, hidden_dim, encoding_dim, latent_dim)
autoencoder_model = Model(input=prufer_input, output=parent_output)
autoencoder_model.compile(loss='cosine_proximity', optimizer=Adam())
In [35]:
for e in range(n_epochs * n_batch_per_epoch):
# Generate training data
X, y = generate_training_batch(batch_size, n_nodes)
# Train
ae_loss = autoencoder_model.train_on_batch(X, y)
# Print
if e % 100 == 0:
print("Epoch #{0}".format(e))
print(" Loss: {0}".format(ae_loss))
# Display
if e % 100 == 0:
y_pred = autoencoder_model.predict(X)
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(y[0, :, :], cmap='Greys', interpolation='none')
plt.subplot(1,2,2)
plt.imshow(y_pred[0, :, :], cmap='Greys', interpolation='none')
plt.show()
In [18]:
y_pred = autoencoder_model.predict(X)
y_pred.shape
Out[18]:
In [19]:
for ex in range(batch_size):
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(y[ex, :, :], cmap='Greys', interpolation='none')
plt.subplot(1,2,2)
plt.imshow(y_pred[ex, :, :], cmap='Greys', interpolation='none')
plt.show()
In [22]:
y_pred[ex, :, :].argmax(axis=1)
Out[22]:
In [23]:
y.argmax(axis=1)
Out[23]:
In [35]:
def masked_softmax(input_layer, n_nodes, batch_size):
"""
A Lambda layer to mask a matrix of outputs to be lower-triangular.
Each row must sum up to one. We apply a lower triangular mask of ones
and then add an upper triangular mask of a large negative number.
Parameters
----------
input_layer: keras layer object
(n x 1, n) matrix
Returns
-------
output_layer: keras layer object
(n x 1, n) matrix
"""
mask_lower = K.theano.tensor.tril(K.ones((n_nodes - 1, n_nodes)))
mask_upper = K.theano.tensor.triu(-100. * K.ones((n_nodes - 1, n_nodes)), 1)
mask_layer = mask_lower * input_layer + mask_upper
mask_layer = K.reshape(mask_layer, (batch_size * (n_nodes -1), n_nodes))
softmax_layer = K.softmax(mask_layer)
output_layer = K.reshape(softmax_layer, (batch_size, n_nodes -1, n_nodes))
return output_layer
In [36]:
input_layer = Input(shape=(n_nodes - 1, n_nodes))
dense_layer = TimeDistributed(Dense(20))(input_layer)
In [55]:
lambda_args = {'n_nodes': n_nodes, 'batch_size': 2}
#lambda_args = {'n_nodes': n_nodes}
next_layer = Lambda(masked_softmax,
output_shape=(n_nodes - 1, n_nodes),
arguments=lambda_args)(dense_layer)
In [56]:
test_model = Model(input=input_layer, output=next_layer)
In [57]:
test_model.summary()
In [68]:
l = test_model.layers[2]
l.name
Out[68]:
In [58]:
example_input = np.random.randn(2, 19, 20)
#example_input = example_input * np_mask
#example_input.shape
#plt.imshow(example_input[0], interpolation='none', cmap='Greys')
#plt.colorbar()
#plt.show()
In [59]:
tmp = test_model.predict(example_input)
plt.imshow(tmp[0,:,:], interpolation='none', cmap='Greys')
plt.colorbar()
plt.show()
In [312]:
tmp.shape
Out[312]:
In [298]:
tmp.sum(axis=1)
Out[298]:
In [278]:
np_mask = np.tril(np.ones((n_nodes - 1, n_nodes))) + \
np.triu(-100. * np.ones((n_nodes - 1, n_nodes)), 1)
In [279]:
np_mask[0,0]
Out[279]:
In [280]:
plt.imshow(np_mask, cmap='Greys', interpolation='none')
plt.colorbar()
plt.show()
In [257]:
tmp = np.zeros((2,3,4))
tmp[0, :, :] = [[1., 2., 3., 4.], [4., 2., 7., 8.], [0., 1., 4., 2.]]
tmp[1, :, :] = [[-1., -2., -3., -4.], [-4., -2., -7., -8.], [-0., -1., -4., -2.]]
In [258]:
tmp.shape
Out[258]:
In [269]:
tmp
Out[269]:
In [260]:
tmp2 = tmp.reshape((6, 4))
tmp2.shape
Out[260]:
In [268]:
tmp2
Out[268]:
In [265]:
tmp2 = np
In [266]:
tmp3 = tmp2.reshape((2,3,4))
tmp3.shape
Out[266]:
In [267]:
tmp3
Out[267]:
In [60]:
def invert_full_matrix_np(full_adjacency):
full_adjacency = np.squeeze(full_adjacency)
n_nodes = full_adjacency.shape[1]
full_adjacency = np.append(np.zeros([1, n_nodes]), full_adjacency)
full_adjacency[0, 0] = 1
adjacency = np.eye(n_nodes) - np.linalg.inv(full_adjacency)
return adjacency[1:, :]
In [69]:
'dense' in 'dense_4'
Out[69]:
In [ ]: