In [18]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model

In [23]:
import numpy as np

In [8]:
sentences = [ "i like dog", "i love coffee", "i hate milk"]

In [11]:
word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
number_dict = {i: w for i, w in enumerate(word_list)}
n_class = len(word_dict) # number of Vocabulary
n_embedding = 5

In [10]:
# NNLM Parameter
n_step = 2 # number of steps ['i like', 'i love', 'i hate']
n_hidden = 2 # number of hidden units

In [85]:
def make_batch(sentences):
    input_batch = []
    target_batch = []

    for sen in sentences:
        word = sen.split()
        input_ = [word_dict[n] for n in word[:-1]]
        target = word_dict[word[-1]]

        input_batch.append(input_)
        target_batch.append(target)

    return np.asarray(input_batch), np.asarray(target_batch)

In [70]:
## for professional

class MyModel(Model):
    
    def __init__(self):
        super(MyModel, self).__init__()
        self.embedding = layers.Embedding(n_class, n_embedding, input_length=n_step)
        self.reshape = layers.Reshape((n_step*n_embedding,))
        self.d1 = layers.Dense(n_hidden, activation='tanh')
        self.d2 = layers.Dense(n_class, use_bias=False)
        self.d3 = layers.Dense(n_class)
        self.add = layers.Add()
    
    def call(self, x):
        xm = self.embedding(x)
        xm = self.reshape(xm)
        
        o1 = self.d1(xm)
        o1 = self.d2(o1)
        o2 = self.d3(xm)
        
        return self.add([o1, o2])

In [90]:
model = MyModel()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

In [91]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

In [93]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        predictions = model(x)
        loss = loss_object(y, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(y, predictions)

In [94]:
@tf.function
def test_step(x, y):
    predictions = model(x)
    t_loss = loss_object(y, predictions)

    test_loss(t_loss)
    test_accuracy(y, predictions)

In [98]:
EPOCHS = 500

for epoch in range(EPOCHS):
    # 在下一个epoch开始时,重置评估指标
    train_loss.reset_states()
    train_accuracy.reset_states()
    test_loss.reset_states()
    test_accuracy.reset_states()

    train_step(input_batch, target_batch)

    test_step(input_batch, target_batch)
    if epoch % 100 == 0:
        template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
        print (template.format(epoch+1,
                             train_loss.result(),
                             train_accuracy.result()*100,
                             test_loss.result(),
                             test_accuracy.result()*100))

## end for professional


Epoch 1, Loss: 0.0205826535820961, Accuracy: 100.0, Test Loss: 0.02053370140492916, Test Accuracy: 100.0
Epoch 101, Loss: 0.016421500593423843, Accuracy: 100.0, Test Loss: 0.016386309638619423, Test Accuracy: 100.0
Epoch 201, Loss: 0.01338109653443098, Accuracy: 100.0, Test Loss: 0.013354960829019547, Test Accuracy: 100.0
Epoch 301, Loss: 0.011092644184827805, Accuracy: 100.0, Test Loss: 0.011072690598666668, Test Accuracy: 100.0
Epoch 401, Loss: 0.009327310137450695, Accuracy: 100.0, Test Loss: 0.00931168720126152, Test Accuracy: 100.0

In [67]:
## for beginners

x = layers.Input((n_step,))
xm = layers.Embedding(n_class, n_embedding, input_length=n_step)(x)
xm = layers.Reshape((n_step*n_embedding, ))(xm)

o1 = layers.Dense(n_hidden, activation='tanh')(xm)
o1 = layers.Dense(n_class, use_bias=False)(o1)

o2 = layers.Dense(n_class)(xm)

y = layers.add([o1, o2])

model = Model(inputs=x, outputs=y)

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

input_batch, target_batch = make_batch(sentences)

model.summary()

model.fit(np.asarray(input_batch), np.asarray(target_batch), epochs=500, verbose=2)

pred = model.predict(input_batch)

print([sen.split()[:2] for sen in sentences], '=>', [word_list[n] for n in np.argmax(pred,axis=1)])

## embedding matrix
print(model.layers[1].get_weights()[0].shape)

## End for beginners

In [ ]: