In [0]:
!pip install tensorflow_text==2.0.1

In [0]:
import tensorflow as tf
import tensorflow_text as text

In [0]:
ragged_input = tf.ragged.constant([[1, 2, 3, 4, 5], [5, 6]])
input_data = tf.data.Dataset.from_tensor_slices(ragged_input).batch(2)

model = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=(None,), dtype='int32', ragged=True),
  text.keras.layers.ToDense(pad_value=0, mask=True),
  tf.keras.layers.Embedding(100, 16),
  tf.keras.layers.LSTM(32),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
  optimizer="rmsprop",
  loss="binary_crossentropy",
  metrics=["accuracy"])

output = model.predict(input_data)
print(output)


[[0.49998033]
 [0.5012409 ]]

In [0]:
def _CreateTable(vocab, num_oov=1):
  init = tf.lookup.KeyValueTensorInitializer(
      vocab,
      tf.range(tf.size(vocab, out_type=tf.int64), dtype=tf.int64),
      key_dtype=tf.string,
      value_dtype=tf.int64)
  return tf.lookup.StaticVocabularyTable(
      init, num_oov, lookup_key_dtype=tf.string)

reviews_data_array = ['I really liked this movie', 'not my favorite']
reviews_labels_array = [1,0]
train_x = tf.constant(reviews_data_array)
train_y = tf.constant(reviews_labels_array)

a = _CreateTable(['I', 'really', 'liked', 'this', 'movie', 'not', 'my', 'favorite'])

def preprocess(data, labels):
  t = text.WhitespaceTokenizer()
  data = t.tokenize(data)
  # data = data.merge_dims(-2,-1)
  ids = tf.ragged.map_flat_values(a.lookup, data)
  return (ids, labels)

train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(2)
train_dataset = train_dataset.map(preprocess)

model = tf.keras.Sequential([
  tf.keras.layers.InputLayer(input_shape=(None,), dtype='int64', ragged=True),
  text.keras.layers.ToDense(pad_value=0, mask=True),
  tf.keras.layers.Embedding(100, 16),
  tf.keras.layers.LSTM(32),
  tf.keras.layers.Dense(32, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
  optimizer="rmsprop",
  loss="binary_crossentropy",
  metrics=["accuracy"])

output = model.fit(train_dataset, epochs=1, verbose=1)
print(output)


1/1 [==============================] - 2s 2s/step - loss: 0.6915 - accuracy: 1.0000
<tensorflow.python.keras.callbacks.History object at 0x7f7d64b5e5f8>