In [1]:
# Import TensorFlow >= 1.9 and enable eager execution
import tensorflow as tf

# Note: Once you enable eager execution, it cannot be disabled. 
tf.enable_eager_execution()

import numpy as np
import re
import random
import unidecode
import time


C:\ProgramData\Anaconda3\lib\site-packages\h5py\__init__.py:34: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters

In [3]:
path_to_file =  r'D:\GitHub\Side-Projects\hmm_demo\data\test_data1.txt'

In [4]:
text = unidecode.unidecode(open(path_to_file).read())
# length of text is the number of characters in it
print (len(text))
# unique contains all the unique characters in the file
unique = sorted(set(text))

# creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(unique)}
idx2char = {i:u for i, u in enumerate(unique)}
# setting the maximum length sentence we want for a single input in characters
max_length = 120

# length of the vocabulary in chars
vocab_size = len(unique)

# the embedding dimension 
embedding_dim = 128

# number of RNN (here GRU) units
units = 512

# batch size 
BATCH_SIZE = 64

# buffer size to shuffle our dataset
BUFFER_SIZE = 10000


42420

In [5]:
input_text = []
target_text = []

for f in range(0, len(text)-max_length, max_length):
    inps = text[f:f+max_length]
    targ = text[f+1:f+1+max_length]

    input_text.append([char2idx[i] for i in inps])
    target_text.append([char2idx[t] for t in targ])
    
print (np.array(input_text).shape)
print (np.array(target_text).shape)
dataset = tf.data.Dataset.from_tensor_slices((input_text, target_text)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(batch_size=BATCH_SIZE, drop_remainder=True)


(353, 120)
(353, 120)

In [6]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units, batch_size):
    super(Model, self).__init__()
    self.units = units
    self.batch_sz = batch_size

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          return_state=True, 
                                          recurrent_initializer='glorot_uniform')
    else:
      self.gru = tf.keras.layers.GRU(self.units, 
                                     return_sequences=True, 
                                     return_state=True, 
                                     recurrent_activation='sigmoid', 
                                     recurrent_initializer='glorot_uniform')

    self.fc = tf.keras.layers.Dense(vocab_size)
        
  def call(self, x, hidden):
    x = self.embedding(x)

    # output shape == (batch_size, max_length, hidden_size) 
    # states shape == (batch_size, hidden_size)

    # states variable to preserve the state of the model
    # this will be used to pass at every step to the model while training
    output, states = self.gru(x, initial_state=hidden)


    # reshaping the output so that we can pass it to the Dense layer
    # after reshaping the shape is (batch_size * max_length, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # The dense layer will output predictions for every time_steps(max_length)
    # output shape after the dense layer == (max_length * batch_size, vocab_size)
    x = self.fc(output)

    return x, states

In [7]:
model = Model(vocab_size, embedding_dim, units, BATCH_SIZE)
optimizer = tf.train.AdamOptimizer()

# using sparse_softmax_cross_entropy so that we don't have to create one-hot vectors
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)

In [10]:
# Training step

EPOCHS = 30

for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    hidden = model.reset_states()
    
    for (batch, (inp, target)) in enumerate(dataset):
          with tf.GradientTape() as tape:
              # feeding the hidden state back into the model
              # This is the interesting step
              predictions, hidden = model(inp, hidden)
              
              # reshaping the target because that's how the 
              # loss function expects it
              target = tf.reshape(target, (-1,))
              loss = loss_function(target, predictions)
              
          grads = tape.gradient(loss, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables), global_step=tf.train.get_or_create_global_step())

          if batch % 100 == 0:
              print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1,
                                                            batch,
                                                            loss))
    
    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


Epoch 1 Batch 0 Loss 2.5419
Epoch 1 Loss 2.5568
Time taken for 1 epoch 28.42983388900757 sec

Epoch 2 Batch 0 Loss 2.5338
Epoch 2 Loss 2.4479
Time taken for 1 epoch 27.984103441238403 sec

Epoch 3 Batch 0 Loss 2.4711
Epoch 3 Loss 2.4515
Time taken for 1 epoch 28.62662982940674 sec

Epoch 4 Batch 0 Loss 2.4163
Epoch 4 Loss 2.4052
Time taken for 1 epoch 29.337477207183838 sec

Epoch 5 Batch 0 Loss 2.3668
Epoch 5 Loss 2.3750
Time taken for 1 epoch 28.971940517425537 sec

Epoch 6 Batch 0 Loss 2.3556
Epoch 6 Loss 2.3287
Time taken for 1 epoch 29.04780650138855 sec

Epoch 7 Batch 0 Loss 2.2694
Epoch 7 Loss 2.3117
Time taken for 1 epoch 29.122838020324707 sec

Epoch 8 Batch 0 Loss 2.2595
Epoch 8 Loss 2.2596
Time taken for 1 epoch 29.230953454971313 sec

Epoch 9 Batch 0 Loss 2.2687
Epoch 9 Loss 2.1942
Time taken for 1 epoch 28.901506185531616 sec

Epoch 10 Batch 0 Loss 2.2164
Epoch 10 Loss 2.2007
Time taken for 1 epoch 28.453840494155884 sec

Epoch 11 Batch 0 Loss 2.1686
Epoch 11 Loss 2.1753
Time taken for 1 epoch 29.440170764923096 sec

Epoch 12 Batch 0 Loss 2.0990
Epoch 12 Loss 2.1277
Time taken for 1 epoch 28.800952672958374 sec

Epoch 13 Batch 0 Loss 2.0692
Epoch 13 Loss 2.0759
Time taken for 1 epoch 28.371025323867798 sec

Epoch 14 Batch 0 Loss 2.0714
Epoch 14 Loss 2.0495
Time taken for 1 epoch 29.415270805358887 sec

Epoch 15 Batch 0 Loss 2.0193
Epoch 15 Loss 2.0298
Time taken for 1 epoch 29.214804649353027 sec

Epoch 16 Batch 0 Loss 1.9666
Epoch 16 Loss 1.9937
Time taken for 1 epoch 29.093130350112915 sec

Epoch 17 Batch 0 Loss 1.9519
Epoch 17 Loss 1.9658
Time taken for 1 epoch 28.43707036972046 sec

Epoch 18 Batch 0 Loss 1.9048
Epoch 18 Loss 1.9277
Time taken for 1 epoch 29.72625231742859 sec

Epoch 19 Batch 0 Loss 1.9021
Epoch 19 Loss 1.8893
Time taken for 1 epoch 28.533660411834717 sec

Epoch 20 Batch 0 Loss 1.8581
Epoch 20 Loss 1.8535
Time taken for 1 epoch 29.51002287864685 sec

Epoch 21 Batch 0 Loss 1.8595
Epoch 21 Loss 1.8073
Time taken for 1 epoch 28.49074363708496 sec

Epoch 22 Batch 0 Loss 1.8476
Epoch 22 Loss 1.7899
Time taken for 1 epoch 29.82550573348999 sec

Epoch 23 Batch 0 Loss 1.7815
Epoch 23 Loss 1.8252
Time taken for 1 epoch 29.13753628730774 sec

Epoch 24 Batch 0 Loss 1.7837
Epoch 24 Loss 1.7554
Time taken for 1 epoch 30.100502729415894 sec

Epoch 25 Batch 0 Loss 1.7249
Epoch 25 Loss 1.7344
Time taken for 1 epoch 30.595205068588257 sec

Epoch 26 Batch 0 Loss 1.6713
Epoch 26 Loss 1.7076
Time taken for 1 epoch 30.557610273361206 sec

Epoch 27 Batch 0 Loss 1.7000
Epoch 27 Loss 1.6734
Time taken for 1 epoch 30.194127798080444 sec

Epoch 28 Batch 0 Loss 1.6688
Epoch 28 Loss 1.6353
Time taken for 1 epoch 30.46005606651306 sec

Epoch 29 Batch 0 Loss 1.6302
Epoch 29 Loss 1.6048
Time taken for 1 epoch 30.652449131011963 sec

Epoch 30 Batch 0 Loss 1.6106
Epoch 30 Loss 1.5948
Time taken for 1 epoch 30.591050624847412 sec


In [14]:
# Evaluation step(generating text using the model learned)

# number of characters to generate
num_generate = 1000

# You can change the start string to experiment
start_string = 'A'
# converting our start string to numbers(vectorizing!) 
input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

# empty string to store our results
text_generated = ''

# low temperatures results in more predictable text.
# higher temperatures results in more surprising text
# experiment to find the best setting
temperature = 2.0

# hidden state shape == (batch_size, number of rnn units); here batch size == 1
hidden = [tf.zeros((1, units))]
for i in range(num_generate):
    predictions, hidden = model(input_eval, hidden)

    # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.multinomial(tf.exp(predictions), num_samples=1)[0][0].numpy()
    
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated += idx2char[predicted_id]

print (start_string + text_generated)


Aed a bug where the versions of the sement to 4: que the Elyound in the sement for the can now can seme to the will a bug where the Gement for the Parant Crantor Stite 
You could the skill gem to from the Kith from 20% increased the mant (down from 10%). The sement for the real the mant for the damage and the Oriand -ppere the Abyss now deal now crapter to now cranted to the Stare Acting a lighte skills which can now can now be the deal now seal damage the man increased the skill gem to the mant for the can now be the damage of the level requirement for the can now can now and the Elyor all Fill no longer can now ald the revel requirement for the atem a dumage of the sement for the 1tom for the /and a now deal sumport to the kill now grants while have been redured the pard dife and a dimate of the atem could to the sement to the best in the real damage the damage of the EtemVity (zor the skill gem to the pard now can now area to 70.
Liget and a now the partion in the Level String and th

In [ ]: