Learning to read with TensorFlow and Keras

Install necessary packages


In [0]:
!pip install tf-nightly
!pip install tensorflow-addons
!pip install keras-tuner

In [0]:
import tensorflow as tf
import tensorflow_addons as tfa
tf.__version__
dir(tfa.seq2seq)

Download the data


In [0]:
!wget http://www.thespermwhale.com/jaseweston/babi/CBTest.tgz
!tar -xf CBTest.tgz

In [0]:
!ls CBTest/data

Preprocess the data


In [0]:
lines = tf.data.TextLineDataset('CBTest/data/cbt_train.txt')
for row in lines.take(3):
  print(row)

In [0]:
lines = lines.filter(
    lambda x: not tf.strings.regex_full_match(x, "_BOOK_TITLE_.*"))

punctuation = r'[!"#$%&()\*\+,-\./:;<=>?@\[\\\]^_`{|}~\']'
lines = lines.map(
    lambda x: tf.strings.regex_replace(x, punctuation, ' ') )

for row in lines.take(3):
  print(row)

In [0]:
# Split the lines on `spaces`.
words = lines.map(tf.strings.split)

# Batch them into 11 words per batch. This way
# the first 10 words is the training data and the 
# 11th word is the prediction word.
wordsets = words.unbatch().batch(11)

for row in wordsets.take(3):
  print(row)

In [0]:
def get_example_label(row):
  example = tf.strings.reduce_join(row[:-1], separator=' ')
  example = tf.expand_dims(example, axis=0)
  label = row[-1:]
  return example, label

data = wordsets.map(get_example_label)
data = data.shuffle(1000)
for row in data.take(3):
  print(row)

Use the TextVectorization layer to tokenize the training data.


In [0]:
max_features = 5000  # Maximum vocab size.

vectorize_layer = tf.keras.layers.experimental.preprocessing.TextVectorization(
  max_tokens=max_features,
  output_sequence_length=10)

vectorize_layer.adapt(lines.batch(64))

In [0]:
vectorize_layer.get_vocabulary()[:5]

In [0]:
vectorize_layer.get_vocabulary()[-5:]

In [0]:
for batch in data.batch(3).take(1):
  print(batch[0])
  print(vectorize_layer(batch[0]))

Create the Encoder-Decoder based model.


In [0]:
class EncoderDecoder(tf.keras.Model):
  def __init__(self, max_features=5000, embedding_dims=200, rnn_units=1024):
    super().__init__()
    self.max_features = max_features
    self.vectorize_layer = tf.keras.layers.experimental.preprocessing.TextVectorization(
        max_tokens=max_features,
        output_sequence_length=10)
    self.encoder_embedding = tf.keras.layers.Embedding(
        max_features + 1, embedding_dims)
    self.lstm_layer = tf.keras.layers.LSTM(rnn_units, return_state=True)

    self.decoder_embedding = tf.keras.layers.Embedding(
        max_features + 1, embedding_dims)
    sampler = tfa.seq2seq.sampler.TrainingSampler()
    decoder_cell = tf.keras.layers.LSTMCell(rnn_units)
    projection_layer = tf.keras.layers.Dense(max_features)
    self.decoder = tfa.seq2seq.BasicDecoder(
        decoder_cell, sampler, output_layer=projection_layer)
    
    self.attention = tf.keras.layers.Attention()

  def train_step(self, data):
    x, y = data[0], data[1]
    x = self.vectorize_layer(x)
    # The vectorize layer pads, but we only need the first val for labels
    y = self.vectorize_layer(y)[:, 0:1]
    y_one_hot = tf.one_hot(y, self.max_features)

    with tf.GradientTape() as tape:
      embedded_inputs = self.encoder_embedding(x)
      encoder_outputs, state_h, state_c = self.lstm_layer(embedded_inputs)
      
      attn_output = self.attention([encoder_outputs, state_h])
      attn_output = tf.expand_dims(attn_output, axis=1)
      
      targets = self.decoder_embedding(tf.zeros_like(y))
      concat_output = tf.concat([targets, attn_output], axis=-1)
      outputs, _, _ = self.decoder(
          concat_output, initial_state=[state_h, state_c])
      
      y_pred = outputs.rnn_output
      
      loss = self.compiled_loss(
          y_one_hot, 
          y_pred, 
          regularization_losses=self.losses)
    
    trainable_variables = self.trainable_variables
    gradients = tape.gradient(loss, trainable_variables)
    self.optimizer.apply_gradients(zip(gradients, trainable_variables))

    self.compiled_metrics.update_state(y_one_hot, y_pred)
    return {m.name: m.result() for m in self.metrics}

  def predict_step(self, data, select_from_top_n=1):
    x = data
    if isinstance(x, tuple) and len(x) == 2:
      x = x[0]
    x = self.vectorize_layer(x)
    embedded_inputs = self.encoder_embedding(x)
    encoder_outputs, state_h, state_c = self.lstm_layer(embedded_inputs)
    attn_output = self.attention([encoder_outputs, state_h])
    attn_output = tf.expand_dims(attn_output, axis=1)
    
    targets = self.decoder_embedding(tf.zeros_like(x[:, -1:]))
    concat_output = tf.concat([targets, attn_output], axis=-1)
    outputs, _, _ = self.decoder(
        concat_output, initial_state=[state_h, state_c])
    
    y_pred = tf.squeeze(outputs.rnn_output, axis=1)
    top_n = tf.argsort(
        y_pred[:, 2:], axis=1, direction='DESCENDING')[: ,:select_from_top_n]
    chosen_indices = tf.random.uniform(
        [top_n.shape[0], 1], minval=0, maxval=select_from_top_n, 
        dtype=tf.dtypes.int32)
    counter = tf.expand_dims(tf.range(0, top_n.shape[0]), axis=1)
    indices = tf.concat([counter, chosen_indices], axis=1)
    choices = tf.gather_nd(top_n, indices)
    words = [self.vectorize_layer.get_vocabulary()[i] for i in choices]
    return words

  def predict(self, starting_string, num_steps=50, select_from_top_n=1):
    s = tf.compat.as_bytes(starting_string).split(b' ')
    for _ in range(num_steps):
      windowed = [b' '.join(s[-10:])]
      pred = self.predict_step([windowed], select_from_top_n=select_from_top_n)
      s.append(pred[0])
    return b' '.join(s)

In [0]:
model = EncoderDecoder()
model.compile(
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), 
    optimizer='adam', 
    metrics=['accuracy'])
model.vectorize_layer.adapt(lines.batch(256))

In [0]:
model.fit(data.batch(256), epochs=30, callbacks=[tf.keras.callbacks.ModelCheckpoint('text_gen_ckpt')])

In [0]:
model.fit(data.batch(256), epochs=10, callbacks=[tf.keras.callbacks.ModelCheckpoint('text_gen_ckpt')])

In [0]:
!ls

In [0]:
model.load_weights('text_gen_ckpt')

In [0]:
print(model.predict('The mouse and the rabbit went in together'))

print(model.predict('Once upon a time there was a Queen named Darling'))

print(model.predict('In a city far from here the teacup shook upon the table'))

print(model.predict('It was a strange and quiet theater and the people watched from home'))

Use keras-tuner to tune the hyperparameters


In [0]:
import kerastuner as kt

def build_model(hp):
  model = EncoderDecoder(
      rnn_units=hp.Int('units', min_value=256, max_value=1200, step=256))
  
  model.compile(
      optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', values=[1e-3, 1e-4, 3e-4])),
      loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),                 
      metrics=['accuracy'])
  
  model.vectorize_layer.adapt(lines.batch(256))
  return model

tuner = kt.tuners.RandomSearch(
    build_model,
    objective='accuracy',
    max_trials=15,
    executions_per_trial=1,
    directory='my_dir',
    project_name='text_generation')

tuner.search(
    data.batch(256), 
    epochs=10, 
    callbacks=[tf.keras.callbacks.ModelCheckpoint('text_gen_ckpt')])